You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by ss...@apache.org on 2015/05/09 03:42:39 UTC

[01/43] tez git commit: TEZ-2406. Tez UI: Display per-io counter columns in task and attempt pages under vertex (Sreenath Somarajapuram via pramachandran)

Repository: tez
Updated Branches:
  refs/heads/TEZ-2003 2562c59f0 -> d03e330fa (forced update)


TEZ-2406. Tez UI: Display per-io counter columns in task and attempt pages under vertex (Sreenath Somarajapuram via pramachandran)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/12ef073c
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/12ef073c
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/12ef073c

Branch: refs/heads/TEZ-2003
Commit: 12ef073c205ce7b09827a7ecbeea457d589c1c5f
Parents: 5679b28
Author: Prakash Ramachandran <pr...@hortonworks.com>
Authored: Wed May 6 15:36:49 2015 +0530
Committer: Prakash Ramachandran <pr...@hortonworks.com>
Committed: Wed May 6 15:36:49 2015 +0530

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../components/basic-table/search-view.js       |  2 +-
 .../task_task_attempts_controller.js            | 16 +---
 .../vertex_task_attempts_controller.js          | 16 +---
 .../controllers/vertex_tasks_controller.js      | 16 +---
 .../src/main/webapp/app/scripts/helpers/misc.js | 94 +++++++++++++++++++-
 .../scripts/mixins/auto-counter-column-mixin.js | 58 ++++++++++++
 .../app/scripts/mixins/column-selector-mixin.js | 85 ++++++++++++++----
 .../main/webapp/app/scripts/views/checkbox.js   | 29 ++++++
 .../app/scripts/views/multi-select-view.js      | 72 +++++++++++++++
 tez-ui/src/main/webapp/app/styles/colors.less   |  1 +
 tez-ui/src/main/webapp/app/styles/main.less     | 59 ++++++++++--
 tez-ui/src/main/webapp/app/styles/shared.less   |  4 +
 .../main/webapp/app/templates/common/table.hbs  | 42 ++++-----
 .../app/templates/components/basic-table.hbs    | 41 +++++----
 .../components/basic-table/header-cell.hbs      |  2 +-
 .../components/basic-table/search-view.hbs      |  2 +-
 tez-ui/src/main/webapp/app/templates/dags.hbs   |  3 -
 .../webapp/app/templates/views/multi-select.hbs | 55 ++++++++++++
 19 files changed, 491 insertions(+), 107 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 2446f6a..f060a8c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -15,6 +15,7 @@ INCOMPATIBLE CHANGES
   TEZ-1993. Implement a pluggable InputSizeEstimator for grouping fairly
 
 ALL CHANGES:
+  TEZ-2406. Tez UI: Display per-io counter columns in task and attempt pages under vertex
   TEZ-2384. Add warning message in the case of prewarn under non-session mode.
   TEZ-2415. PMC RDF needs to use asfext:pmc, not asfext:PMC.
   TEZ-1752. Inputs / Outputs in the Runtime library should be interruptable.

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/components/basic-table/search-view.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/components/basic-table/search-view.js b/tez-ui/src/main/webapp/app/scripts/components/basic-table/search-view.js
index 4dd41a1..36a2d4f 100644
--- a/tez-ui/src/main/webapp/app/scripts/components/basic-table/search-view.js
+++ b/tez-ui/src/main/webapp/app/scripts/components/basic-table/search-view.js
@@ -23,7 +23,7 @@ App.BasicTableComponent.SearchView = Ember.View.extend({
 
   text: '',
   _boundText: function () {
-    return this.get('text');
+    return this.get('text') || '';
   }.property(),
 
   _validRegEx: function () {

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js b/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js
index 558a740..c5c9eea 100644
--- a/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js
+++ b/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js
@@ -16,12 +16,13 @@
  * limitations under the License.
  */
 
-App.TaskAttemptsController = App.TablePageController.extend({
+App.TaskAttemptsController = App.TablePageController.extend(App.AutoCounterColumnMixin, {
 
   controllerName: 'TaskAttemptsController',
   needs: "task",
 
   entityType: 'taskTaskAttempt',
+  baseEntityType: 'taskAttempt',
   filterEntityType: 'task',
   filterEntityId: Ember.computed.alias('controllers.task.id'),
 
@@ -193,18 +194,7 @@ App.TaskAttemptsController = App.TablePageController.extend({
         }
       }
     ];
-  }.property(),
-
-  columnConfigs: function() {
-    return this.get('defaultColumnConfigs').concat(
-      App.Helpers.misc.normalizeCounterConfigs(
-        App.get('Configs.defaultCounters').concat(
-          App.get('Configs.tables.entity.taskAttempt') || [],
-          App.get('Configs.tables.sharedColumns') || []
-        )
-      )
-    );
-  }.property(),
+  }.property('filterEntityId'),
 
 });
 

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/controllers/vertex_task_attempts_controller.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/controllers/vertex_task_attempts_controller.js b/tez-ui/src/main/webapp/app/scripts/controllers/vertex_task_attempts_controller.js
index f395b40..b4ed89a 100644
--- a/tez-ui/src/main/webapp/app/scripts/controllers/vertex_task_attempts_controller.js
+++ b/tez-ui/src/main/webapp/app/scripts/controllers/vertex_task_attempts_controller.js
@@ -16,12 +16,13 @@
  * limitations under the License.
  */
 
-App.VertexTaskAttemptsController = App.TablePageController.extend({
+App.VertexTaskAttemptsController = App.TablePageController.extend(App.AutoCounterColumnMixin, {
 
   controllerName: 'VertexTaskAttemptsController',
   needs: "vertex",
 
   entityType: 'vertexTaskAttempt',
+  baseEntityType: 'taskAttempt',
   filterEntityType: 'vertex',
   filterEntityId: Ember.computed.alias('controllers.vertex.id'),
 
@@ -196,17 +197,6 @@ App.VertexTaskAttemptsController = App.TablePageController.extend({
         }
       }
     ];
-  }.property(),
-
-  columnConfigs: function() {
-    return this.get('defaultColumnConfigs').concat(
-      App.Helpers.misc.normalizeCounterConfigs(
-        App.get('Configs.defaultCounters').concat(
-          App.get('Configs.tables.entity.taskAttempt') || [],
-          App.get('Configs.tables.sharedColumns') || []
-        )
-      )
-    );
-  }.property(),
+  }.property('filterEntityId'),
 
 });

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/controllers/vertex_tasks_controller.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/controllers/vertex_tasks_controller.js b/tez-ui/src/main/webapp/app/scripts/controllers/vertex_tasks_controller.js
index 953ffcd..2cc0518 100644
--- a/tez-ui/src/main/webapp/app/scripts/controllers/vertex_tasks_controller.js
+++ b/tez-ui/src/main/webapp/app/scripts/controllers/vertex_tasks_controller.js
@@ -16,12 +16,13 @@
  * limitations under the License.
  */
 
-App.VertexTasksController = App.TablePageController.extend({
+App.VertexTasksController = App.TablePageController.extend(App.AutoCounterColumnMixin, {
 
   controllerName: 'VertexTasksController',
   needs: "vertex",
 
   entityType: 'vertexTask',
+  baseEntityType: 'task',
   filterEntityType: 'vertex',
   filterEntityId: Ember.computed.alias('controllers.vertex.id'),
 
@@ -172,17 +173,6 @@ App.VertexTasksController = App.TablePageController.extend({
         }
       }
     ];
-  }.property('id'),
-
-  columnConfigs: function() {
-    return this.get('defaultColumnConfigs').concat(
-      App.Helpers.misc.normalizeCounterConfigs(
-        App.get('Configs.defaultCounters').concat(
-          App.get('Configs.tables.entity.task') || [],
-          App.get('Configs.tables.sharedColumns') || []
-        )
-      )
-    );
-  }.property(),
+  }.property('filterEntityId')
 
 });
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/helpers/misc.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/helpers/misc.js b/tez-ui/src/main/webapp/app/scripts/helpers/misc.js
index 6794e95..81a7693 100644
--- a/tez-ui/src/main/webapp/app/scripts/helpers/misc.js
+++ b/tez-ui/src/main/webapp/app/scripts/helpers/misc.js
@@ -109,6 +109,50 @@ App.Helpers.misc = {
     return classPath.substr(classPath.lastIndexOf('.') + 1);
   },
 
+  /**
+   * Return a normalized group name for a counter name
+   * @param groupName {String}
+   * @return Normlaized name
+   */
+  getCounterGroupDisplayName: function (groupName) {
+    var displayName = App.Helpers.misc.getClassName(groupName), // Remove path
+        ioParts,
+        toText;
+
+    function removeCounterFromEnd(text) {
+      if(text.substr(-7) == 'Counter') {
+        text = text.substr(0, text.length - 7);
+      }
+      return text;
+    }
+
+    displayName = removeCounterFromEnd(displayName);
+
+    // Reformat per-io counters
+    switch(App.Helpers.misc.checkIOCounterGroup(displayName)) {
+      case 'in':
+        ioParts = displayName.split('_INPUT_');
+        toText = 'to %@ Input'.fmt(ioParts[1]);
+      break;
+      case 'out':
+        ioParts = displayName.split('_OUTPUT_');
+        toText = 'to %@ Output'.fmt(ioParts[1]);
+      break;
+    }
+    if(ioParts) {
+      ioParts = ioParts[0].split('_');
+      if(ioParts.length > 1) {
+        displayName = '%@ - %@ %@'.fmt(
+          removeCounterFromEnd(ioParts.shift()),
+          ioParts.join('_'),
+          toText
+        );
+      }
+    }
+
+    return displayName;
+  },
+
   /*
    * Normalizes counter style configurations
    * @param counterConfigs Array
@@ -116,9 +160,14 @@ App.Helpers.misc = {
    */
   normalizeCounterConfigs: function (counterConfigs) {
     return counterConfigs.map(function (configuration) {
-      configuration.headerCellName = configuration.counterName || configuration.counterId;
-      configuration.id = '%@/%@'.fmt(configuration.counterGroupName || configuration.groupId,
-          configuration.counterName || configuration.counterId),
+      var groupName = configuration.counterGroupName || configuration.groupId,
+          counterName = configuration.counterName || configuration.counterId;
+
+      configuration.headerCellName = '%@ - %@'.fmt(
+        App.Helpers.misc.getCounterGroupDisplayName(groupName),
+        counterName
+      );
+      configuration.id = '%@/%@'.fmt(groupName, counterName),
 
       configuration.getSortValue = App.Helpers.misc.getCounterCellContent;
       configuration.getCellContent =
@@ -413,6 +462,45 @@ App.Helpers.misc = {
     }
   },
 
+  /**
+   * Returns in/out/empty string based counter group type
+   * @param counterGroupName {String}
+   * @return in/out/empty string
+   */
+  checkIOCounterGroup: function (counterGroupName) {
+    if(counterGroupName == undefined){
+      debugger;
+    }
+    var relationPart = counterGroupName.substr(counterGroupName.indexOf('_') + 1);
+    if(relationPart.match('_INPUT_')) {
+      return 'in';
+    }
+    else if(relationPart.match('_OUTPUT_')) {
+      return 'out';
+    }
+    return '';
+  },
+
+  /**
+   * Return unique values form array based on a property
+   * @param array {Array}
+   * @param property {String}
+   * @return uniqueArray {Array}
+   */
+  getUniqueByProperty: function (array, property) {
+    var propHash = {},
+        uniqueArray = [];
+
+    array.forEach(function (item) {
+      if(item && !propHash[item[property]]) {
+        uniqueArray.push(item);
+        propHash[item[property]] = true;
+      }
+    });
+
+    return uniqueArray;
+  },
+
   timelinePathForType: (function () {
     var typeToPathMap = {
       dag: 'TEZ_DAG_ID',

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/mixins/auto-counter-column-mixin.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/mixins/auto-counter-column-mixin.js b/tez-ui/src/main/webapp/app/scripts/mixins/auto-counter-column-mixin.js
new file mode 100644
index 0000000..2c6b531
--- /dev/null
+++ b/tez-ui/src/main/webapp/app/scripts/mixins/auto-counter-column-mixin.js
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+App.AutoCounterColumnMixin = Em.Mixin.create({
+
+  baseEntityType: null, // Must be set in the controller that uses this Mixin
+
+  columnSelectorMessage: function () {
+    return "<span class='per-io'>Per-IO counter</span> selection wouldn't persist across %@.".fmt(
+      this.get('filterEntityType').pluralize()
+    );
+  }.property('filterEntityType'),
+
+  columnConfigs: function() {
+    var counterConfigs = App.Helpers.misc.normalizeCounterConfigs(
+      App.get('Configs.defaultCounters').concat(
+        App.get('Configs.tables.entity.' + this.get('baseEntityType')) || [],
+        App.get('Configs.tables.sharedColumns') || []
+      )
+    ), dynamicCounterConfigs = [];
+
+    this.get('data').forEach(function (row) {
+      var counterGroups = row.get('counterGroups');
+      if(counterGroups) {
+        counterGroups.forEach(function (group) {
+          group.counters.forEach(function (counter) {
+            dynamicCounterConfigs.push({
+              counterName: counter.counterName,
+              counterGroupName: group.counterGroupName
+            });
+          });
+        });
+      }
+    });
+
+    return this.get('defaultColumnConfigs').concat(
+      App.Helpers.misc.getUniqueByProperty(counterConfigs.concat(
+        App.Helpers.misc.normalizeCounterConfigs(dynamicCounterConfigs)
+      ), 'id')
+    );
+  }.property('data', 'defaultColumnConfigs', 'baseEntityType')
+
+});
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/mixins/column-selector-mixin.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/mixins/column-selector-mixin.js b/tez-ui/src/main/webapp/app/scripts/mixins/column-selector-mixin.js
index d3b3bbd..3a76d61 100644
--- a/tez-ui/src/main/webapp/app/scripts/mixins/column-selector-mixin.js
+++ b/tez-ui/src/main/webapp/app/scripts/mixins/column-selector-mixin.js
@@ -38,8 +38,10 @@ App.ColumnSelectorMixin = Em.Mixin.create({
   _storeKey: '',
   visibleColumnIds: {},
   columnConfigs: [],
+  selectOptions: [],
 
   columnSelectorTitle: 'Column Selector',
+  columnSelectorMessage: '',
 
   init: function(){
     var visibleColumnIds;
@@ -59,7 +61,7 @@ App.ColumnSelectorMixin = Em.Mixin.create({
 
     this._super();
     this.set('visibleColumnIds', visibleColumnIds);
-  },
+  }.observes('defaultColumnConfigs'), //To reset on entity change
 
   columns: function() {
     var visibleColumnConfigs = this.get('columnConfigs').filter(function (column) {
@@ -67,23 +69,76 @@ App.ColumnSelectorMixin = Em.Mixin.create({
     }, this);
 
     return App.Helpers.misc.createColumnDescription(visibleColumnConfigs);
-  }.property('visibleColumnIds'),
+  }.property('visibleColumnIds', 'columnConfigs'),
+
+  _getSelectOptions: function () {
+    var group = null,
+        highlight = false,
+        visibleColumnIds = this.get('visibleColumnIds');
+
+    return this.get('columnConfigs').map(function (config) {
+      var css = '';
+
+      highlight = highlight ^ (config.counterGroupName != group),
+      group = config.counterGroupName;
+
+      if(highlight) {
+        css += ' highlight';
+      }
+      if(group && App.Helpers.misc.checkIOCounterGroup(group)) {
+        css += ' per-io';
+      }
+
+      return Em.Object.create({
+        id: config.id,
+        displayText: config.headerCellName,
+        css: css,
+        selected: visibleColumnIds[config.id]
+      });
+    });
+  },
 
   actions: {
     selectColumns: function () {
-      var that = this;
-
-      App.Dialogs.displayMultiSelect(this.get('columnSelectorTitle'), this.get('columnConfigs'), this.visibleColumnIds, {
-        displayText: 'headerCellName'
-      }).then(function (data) {
-        if(isObjectsDifferent(data, that.visibleColumnIds)) {
-          try {
-            localStorage.setItem(that._storeKey , JSON.stringify(data));
-          }catch(e){}
-          that.set('visibleColumnIds', data);
-        }
-      });
+      this.set('selectOptions', this._getSelectOptions());
+
+      Bootstrap.ModalManager.open(
+        'columnSelector',
+        this.get('columnSelectorTitle'),
+        App.MultiSelectView.extend({
+          options: this.get('selectOptions'),
+          message: this.get('columnSelectorMessage')
+        }),
+        [Ember.Object.create({
+          title: 'Ok',
+          dismiss: 'modal',
+          clicked: 'selectionChange'
+        })],
+        this
+      );
+    },
+
+    selectionChange: function () {
+      var visibleColumnIds = this.get('selectOptions').reduce(function (obj, option) {
+            if(option.get('selected')) {
+              obj[option.get('id')] = true;
+            }
+            return obj;
+          }, {}),
+          selectionToSave = this.get('selectOptions').reduce(function (obj, option) {
+            var id = option.id;
+            if(!id.match('_INPUT_') && !id.match('_OUTPUT_') && visibleColumnIds[id]) {
+              obj[id] = true;
+            }
+            return obj;
+          }, {});
+
+      if(isObjectsDifferent(visibleColumnIds, this.get('visibleColumnIds'))) {
+        try {
+          localStorage.setItem(this._storeKey , JSON.stringify(selectionToSave));
+        }catch(e){}
+        this.set('visibleColumnIds', visibleColumnIds);
+      }
     }
   }
-
 });
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/views/checkbox.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/views/checkbox.js b/tez-ui/src/main/webapp/app/scripts/views/checkbox.js
new file mode 100644
index 0000000..da2de14
--- /dev/null
+++ b/tez-ui/src/main/webapp/app/scripts/views/checkbox.js
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+App.Checkbox = Em.Checkbox.extend({
+  change: function() {
+    var value = this.get('checked'),
+        target = this.get('target') || this.get('context');
+
+    if(target) {
+      Em.run.later(target.send.bind(target, this.get('action'), value), 100);
+    }
+    return true;
+  }
+});

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/views/multi-select-view.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/views/multi-select-view.js b/tez-ui/src/main/webapp/app/scripts/views/multi-select-view.js
new file mode 100644
index 0000000..ace7b94
--- /dev/null
+++ b/tez-ui/src/main/webapp/app/scripts/views/multi-select-view.js
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+App.MultiSelectView = Ember.View.extend({
+  templateName: 'views/multi-select',
+  classNames: ['multi-select'],
+
+  selectAll: false,
+  searchRegex: '',
+
+  options: null, //Must be set by sub-classes or instances
+
+  _validRegEx: function () {
+    var regExText = this.get('searchRegex');
+    regExText = regExText.substr(regExText.indexOf(':') + 1);
+    try {
+      new RegExp(regExText, 'im');
+    }
+    catch(e) {
+      return false;
+    }
+    return true;
+  }.property('searchRegex'),
+
+  visibleOptions: function () {
+    var options = this.get('options'),
+        regExText = this.get('searchRegex'),
+        regEx;
+
+    if (Em.isEmpty(regExText) || !this.get('_validRegEx')) {
+      return options;
+    }
+
+    regEx = new RegExp(regExText, 'i');
+    return options.filter(function (option) {
+      return regEx.test(option.get('displayText'));
+    });
+  }.property('options', 'searchRegex'),
+
+  _selectObserver: function () {
+    var selectedCount = 0;
+    this.get('visibleOptions').forEach(function (option) {
+      if(option.get('selected')) {
+        selectedCount++;
+      }
+    });
+    this.set('selectAll', selectedCount > 0 && selectedCount == this.get('visibleOptions.length'));
+  }.observes('visibleOptions.@each.selected'),
+
+  actions: {
+    selectAll: function (checked) {
+      this.get('visibleOptions').forEach(function (option) {
+        option.set('selected', checked);
+      });
+    }
+  }
+});
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/styles/colors.less
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/styles/colors.less b/tez-ui/src/main/webapp/app/styles/colors.less
index aa0d96a..a426893 100644
--- a/tez-ui/src/main/webapp/app/styles/colors.less
+++ b/tez-ui/src/main/webapp/app/styles/colors.less
@@ -33,6 +33,7 @@
 @text-color: #666666;
 @text-red: red;
 @text-light: #BBBBBB;
+@text-green: green;
 
 @top-nav-bg-color-from: #d5d5d5;
 @top-nav-bg-color-to: #f0f0f0;

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/styles/main.less
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/styles/main.less b/tez-ui/src/main/webapp/app/styles/main.less
index 5da7e06..ad3f132 100644
--- a/tez-ui/src/main/webapp/app/styles/main.less
+++ b/tez-ui/src/main/webapp/app/styles/main.less
@@ -657,6 +657,10 @@ body, html {
 
       overflow: hidden;
       transform: translateZ(0);
+
+      i {
+        position: static;
+      }
     }
 
     .horizontal-half {
@@ -715,6 +719,11 @@ body, html {
 
     overflow: auto;
 
+    .data-availability-message {
+      font-size: 24px;
+      text-align: center;
+    }
+
     .table-body {
       .noise-background;
       white-space: nowrap;
@@ -788,11 +797,6 @@ body, html {
 
           padding-right: 15px;
 
-          -webkit-user-select: none;
-          -moz-user-select: none;
-          -ms-user-select: none;
-          user-select: none;
-
           background-color: @bg-grey;
           border-bottom: 1px solid @border-color;
         }
@@ -849,3 +853,48 @@ body, html {
   margin-left: -1px;
   margin-top: -1px;
 }
+
+.multi-select {
+  .message {
+    text-align: right;
+    font-size: 10px;
+  }
+
+  .selection-list {
+    border: 1px solid @border-color;
+
+    .highlight {
+      background-color: @bg-lite;
+    }
+    .select-option, .search-option {
+      border-top: 1px dotted @border-color;
+      padding: 5px;
+
+      .checkbox {
+        margin-right: 10px;
+        float: left;
+        vertical-align: middle;
+      }
+    }
+    .search-option {
+      border: none;
+
+      .form-group {
+        .inline-block;
+        .align-top;
+
+        width: 470px;
+        margin: 4px 0px 0px 20px;
+      }
+    }
+  }
+}
+
+.modal-body {
+  max-height:500px;
+  overflow-y: auto;
+}
+
+.per-io {
+  color: @text-green;
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/styles/shared.less
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/styles/shared.less b/tez-ui/src/main/webapp/app/styles/shared.less
index 7cece16..a3cdf2a 100644
--- a/tez-ui/src/main/webapp/app/styles/shared.less
+++ b/tez-ui/src/main/webapp/app/styles/shared.less
@@ -49,6 +49,10 @@
   white-space: nowrap;
 }
 
+.no-border {
+  border: none !important;
+}
+
 .align-top {
   vertical-align: top;
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/templates/common/table.hbs
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/templates/common/table.hbs b/tez-ui/src/main/webapp/app/templates/common/table.hbs
index 631d08d..462089d 100644
--- a/tez-ui/src/main/webapp/app/templates/common/table.hbs
+++ b/tez-ui/src/main/webapp/app/templates/common/table.hbs
@@ -17,35 +17,31 @@
 }}
 
 {{#unless loading}}
-  {{#if data.length}}
-    {{load-time-component
-      isRefreshable=isRefreshable
-      time=data.content.0.timeStamp
-      refresh='refresh'
-    }}
+  {{load-time-component
+    isRefreshable=isRefreshable
+    time=data.content.0.timeStamp
+    refresh='refresh'
+  }}
 
-    {{basic-table-component
-      columns=columns
-      rows=data.content
+  {{basic-table-component
+    columns=columns
+    rows=data.content
 
-      extraHeaderItem=App.ExtraTableButtonsView
-      statusMessage=statusMessage
+    extraHeaderItem=App.ExtraTableButtonsView
+    statusMessage=statusMessage
 
-      enableSearch=true
-      enablePagination=true
-      enableSort=true
+    enableSearch=true
+    enablePagination=true
+    enableSort=true
 
-      pageNumBinding='pageNum'
-      rowCountBinding='rowCount'
+    pageNumBinding='pageNum'
+    rowCountBinding='rowCount'
 
-      searchTextBinding='searchText'
+    searchTextBinding='searchText'
 
-      sortColumnIdBinding='sortColumnId'
-      sortOrderBinding='sortOrder'
-    }}
-  {{else}}
-    <h1>No records available!</n1>
-  {{/if}}
+    sortColumnIdBinding='sortColumnId'
+    sortOrderBinding='sortOrder'
+  }}
 {{else}}
   {{partial 'partials/loading-spinner'}}
   <div class="text-align-center">

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/templates/components/basic-table.hbs
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/templates/components/basic-table.hbs b/tez-ui/src/main/webapp/app/templates/components/basic-table.hbs
index 175a3b9..d1e48b2 100644
--- a/tez-ui/src/main/webapp/app/templates/components/basic-table.hbs
+++ b/tez-ui/src/main/webapp/app/templates/components/basic-table.hbs
@@ -21,7 +21,10 @@
     <div class='table-header'>
       <div class="horizontal-half align-top">
         {{#if enableSearch}}
-          {{view App.BasicTableComponent.SearchView text=searchText}}
+          {{view App.BasicTableComponent.SearchView
+            text=searchText
+            placeholder="RegEx or Column1, Column2... :RegEx"
+          }}
         {{/if}}
         <div class="table-message">
           {{#if _statusMessage}}
@@ -40,21 +43,27 @@
     </div>
   {{/if}}
   <div class='table-body-container'>
-    <div class='table-body'>
-      {{#each column in _columns}}
-        <div {{bind-attr
-            style=column.customStyle
-            class=":table-column _view.contentIndex::first-item"
-        }}>
-          {{view column.headerCellView}}
-          {{#each row in _rows}}
-            <div class='table-cell {{unbound firstItemCSS _view}}'>
-              {{view column.cellView row=row}}
-            </div>
-          {{/each}}
-        </div>
-      {{/each}}
-    </div>
+    {{#unless _columns.length}}
+      <div class="data-availability-message">No columns available!</div>
+    {{else}}{{#unless _rows.length}}
+      <div class="data-availability-message">No records available!</div>
+    {{else}}
+      <div class='table-body'>
+        {{#each column in _columns}}
+          <div {{bind-attr
+              style=column.customStyle
+              class=":table-column _view.contentIndex::first-item"
+          }}>
+            {{view column.headerCellView}}
+            {{#each row in _rows}}
+              <div class='table-cell {{unbound firstItemCSS _view}}'>
+                {{view column.cellView row=row}}
+              </div>
+            {{/each}}
+          </div>
+        {{/each}}
+      </div>
+    {{/unless}}{{/unless}}
   </div>
   <div class='table-footer'>
     <div class="horizontal-half align-top">

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/templates/components/basic-table/header-cell.hbs
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/templates/components/basic-table/header-cell.hbs b/tez-ui/src/main/webapp/app/templates/components/basic-table/header-cell.hbs
index c975d32..394752c 100644
--- a/tez-ui/src/main/webapp/app/templates/components/basic-table/header-cell.hbs
+++ b/tez-ui/src/main/webapp/app/templates/components/basic-table/header-cell.hbs
@@ -16,7 +16,7 @@
 * limitations under the License.
 }}
 
-<div class='table-header-cell'>
+<div class='table-header-cell' title='{{unbound column.headerCellName}}'>
   <div class='cell-content'>
     {{unbound column.headerCellName}}
   </div>

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/templates/components/basic-table/search-view.hbs
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/templates/components/basic-table/search-view.hbs b/tez-ui/src/main/webapp/app/templates/components/basic-table/search-view.hbs
index 949fc24..4795bbb 100644
--- a/tez-ui/src/main/webapp/app/templates/components/basic-table/search-view.hbs
+++ b/tez-ui/src/main/webapp/app/templates/components/basic-table/search-view.hbs
@@ -19,7 +19,7 @@
 <div {{bind-attr class=":input-group view._validRegEx::has-error"}}>
   {{input
     class="form-control"
-    placeholder="RegEx or Column1, Column2... :RegEx"
+    placeholder=view.placeholder
     action="search"
     value=view._boundText
     targetObject=view

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/templates/dags.hbs
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/templates/dags.hbs b/tez-ui/src/main/webapp/app/templates/dags.hbs
index 727e28a..0f272c5 100644
--- a/tez-ui/src/main/webapp/app/templates/dags.hbs
+++ b/tez-ui/src/main/webapp/app/templates/dags.hbs
@@ -89,9 +89,6 @@
 
       statusMessage=statusMessage
     }}
-    {{#unless sortedContent.length}}
-      <h1 class="no-margin">No records available!</h1>
-    {{/unless}}
   {{else}}
     {{partial 'partials/loading-spinner'}}
     <div class="text-align-center">

http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/templates/views/multi-select.hbs
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/templates/views/multi-select.hbs b/tez-ui/src/main/webapp/app/templates/views/multi-select.hbs
new file mode 100644
index 0000000..ebab317
--- /dev/null
+++ b/tez-ui/src/main/webapp/app/templates/views/multi-select.hbs
@@ -0,0 +1,55 @@
+{{!
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+}}
+
+<div class="message">
+  {{{view.message}}}
+</div>
+<div class="selection-list">
+  <div class="search-option highlight">
+    <div class="inline-block">
+      Select All<br/>
+      {{view App.Checkbox
+        classNames='inline-display checkbox'
+        checked=view.selectAll
+        action='selectAll'
+        target=view
+      }}
+    </div>
+    <div {{bind-attr class=":form-group view._validRegEx::has-error"}}>
+      {{input
+        class="form-control"
+        placeholder="Filter options"
+        value=view.searchRegex
+      }}
+    </div>
+  </div>
+  {{#if view.visibleOptions.length}}
+    {{#each option in view.visibleOptions}}
+      <div class="select-option {{unbound option.css}}"}}>
+        {{input
+          type="checkbox"
+          classNames='inline-display checkbox'
+          checked=option.selected
+        }}
+        {{option.displayText}}
+      </div>
+    {{/each}}
+  {{else}}
+    <h4>&nbsp;No options available...</h4>
+  {{/if}}
+</div>
\ No newline at end of file


[43/43] tez git commit: TEZ-2420. TaskRunner returning before executing the task. (sseth)

Posted by ss...@apache.org.
TEZ-2420. TaskRunner returning before executing the task. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/d03e330f
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/d03e330f
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/d03e330f

Branch: refs/heads/TEZ-2003
Commit: d03e330fa05d67512f826a8507e3cf8564ea3fa5
Parents: ea972ac
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed May 6 17:14:56 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:46:11 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                                                | 1 +
 .../src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java    | 1 +
 2 files changed, 2 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/d03e330f/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index f8a71e8..9b2339f 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -21,5 +21,6 @@ ALL CHANGES:
   TEZ-2381. Fixes after rebase 04/28.
   TEZ-2388. Send dag identifier as part of the fetcher request string.
   TEZ-2414. LogicalIOProcessorRuntimeTask, RuntimeTask, TezTaskRunner should handle interrupts & carry out necessary cleanups.
+  TEZ-2420. TaskRunner returning before executing the task.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/d03e330f/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
index 7238d5e..dd4620a 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
@@ -105,6 +105,7 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
     Throwable failureCause = null;
     if (!Thread.currentThread().isInterrupted()) {
       taskFuture = executor.submit(callable);
+    } else {
       return isShutdownRequested();
     }
     try {


[27/43] tez git commit: TEZ-2138. Fix minor bugs in adding default scheduler, getting launchers. (sseth)

Posted by ss...@apache.org.
TEZ-2138. Fix minor bugs in adding default scheduler, getting launchers.  (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/39295ca2
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/39295ca2
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/39295ca2

Branch: refs/heads/TEZ-2003
Commit: 39295ca2203f1c42cdbb4086df54fca92654d904
Parents: 67e1643
Author: Siddharth Seth <ss...@apache.org>
Authored: Mon Feb 23 20:53:24 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:30 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                                         | 1 +
 .../src/main/java/org/apache/tez/dag/app/DAGAppMaster.java   | 2 +-
 .../java/org/apache/tez/dag/app/dag/impl/VertexImpl.java     | 4 ++++
 .../org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java | 1 +
 .../dag/app/launcher/TezTestServiceContainerLauncher.java    | 8 +++++---
 5 files changed, 12 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/39295ca2/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 4b0a12b..4377f57 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -7,5 +7,6 @@ ALL CHANGES:
   TEZ-2123. Fix component managers to use pluggable components. (Enable hybrid mode)
   TEZ-2125. Create a task communicator for local mode. Allow tasks to run in the AM.
   TEZ-2131. Add additional tests for tasks running in the AM.
+  TEZ-2138. Fix minor bugs in adding default scheduler, getting launchers.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/39295ca2/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index 701eca8..1ea369e 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -1507,7 +1507,7 @@ public class DAGAppMaster extends AbstractService {
 
     @Override
     public Integer getContainerLauncherIdentifier(String name) {
-      return taskCommunicators.get(name);
+      return containerLaunchers.get(name);
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/tez/blob/39295ca2/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
index 18d3368..0385e64 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
@@ -976,6 +976,10 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
         .get(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME, tezDefaultComponentName);
     String containerLauncherName = vertexConf
         .get(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME, tezDefaultComponentName);
+
+    LOG.info("Vertex: " + logIdentifier + " configured with TaskScheduler=" + taskSchedulerName +
+        ", ContainerLauncher=" + containerLauncherName + ", TaskComm=" + taskCommName);
+
     taskSchedulerIdentifier = appContext.getTaskScheduerIdentifier(taskSchedulerName);
     taskCommunicatorIdentifier = appContext.getTaskCommunicatorIdentifier(taskCommName);
     containerLauncherIdentifier = appContext.getContainerLauncherIdentifier(containerLauncherName);

http://git-wip-us.apache.org/repos/asf/tez/blob/39295ca2/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
index 5a8e9fe..8e5fc71 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
@@ -174,6 +174,7 @@ public class TaskSchedulerEventHandler extends AbstractService
           for (int i = 0 ; i < schedulerClasses.length ; i++) { // Copy over the rest.
             this.taskSchedulerClasses[i] = schedulerClasses[i];
           }
+          this.taskSchedulerClasses[foundYarnTaskSchedulerIndex] = TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT;
         } else {
           this.taskSchedulerClasses = schedulerClasses;
         }

http://git-wip-us.apache.org/repos/asf/tez/blob/39295ca2/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
index 27356bc..cb6308c 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
@@ -23,6 +23,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.util.Clock;
 import org.apache.tez.dag.app.AppContext;
@@ -54,6 +55,7 @@ public class TezTestServiceContainerLauncher extends AbstractService implements
   private final int servicePort;
   private final TezTestServiceCommunicator communicator;
   private final Clock clock;
+  private final ApplicationAttemptId appAttemptId;
 
 
   // Configuration passed in here to set up final parameters
@@ -70,6 +72,7 @@ public class TezTestServiceContainerLauncher extends AbstractService implements
     this.communicator = new TezTestServiceCommunicator(numThreads);
     this.context = appContext;
     this.tokenIdentifier = context.getApplicationID().toString();
+    this.appAttemptId = appContext.getApplicationAttemptId();
     this.tal = tal;
   }
 
@@ -128,9 +131,8 @@ public class TezTestServiceContainerLauncher extends AbstractService implements
     RunContainerRequestProto.Builder builder = RunContainerRequestProto.newBuilder();
     InetSocketAddress address = tal.getTaskCommunicator(event.getTaskCommId()).getAddress();
     builder.setAmHost(address.getHostName()).setAmPort(address.getPort());
-    builder.setAppAttemptNumber(event.getContainer().getId().getApplicationAttemptId().getAttemptId());
-    builder.setApplicationIdString(
-        event.getContainer().getId().getApplicationAttemptId().getApplicationId().toString());
+    builder.setAppAttemptNumber(appAttemptId.getAttemptId());
+    builder.setApplicationIdString(appAttemptId.getApplicationId().toString());
     builder.setTokenIdentifier(tokenIdentifier);
     builder.setContainerIdString(event.getContainer().getId().toString());
     builder.setCredentialsBinary(


[17/43] tez git commit: TEZ-2426. Ensure the eventRouter thread completes before switching to a new task and thread safety fixes in IPOContexts. (sseth)

Posted by ss...@apache.org.
TEZ-2426. Ensure the eventRouter thread completes before switching to a
new task and thread safety fixes in IPOContexts. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/ce69aa1e
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/ce69aa1e
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/ce69aa1e

Branch: refs/heads/TEZ-2003
Commit: ce69aa1e2ca3320d33c833a96a158f94bfd73f52
Parents: 6e6ad70
Author: Siddharth Seth <ss...@apache.org>
Authored: Fri May 8 11:08:14 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 11:08:14 2015 -0700

----------------------------------------------------------------------
 CHANGES.txt                                     |   1 +
 .../runtime/LogicalIOProcessorRuntimeTask.java  | 110 +++++++++++++------
 .../runtime/api/impl/TezInputContextImpl.java   |  11 +-
 .../runtime/api/impl/TezOutputContextImpl.java  |   2 +-
 .../api/impl/TezProcessorContextImpl.java       |   4 +-
 .../runtime/api/impl/TezTaskContextImpl.java    |   9 +-
 .../apache/tez/runtime/task/TaskReporter.java   |  47 +++++---
 .../TestLogicalIOProcessorRuntimeTask.java      |  48 +++++---
 8 files changed, 160 insertions(+), 72 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/ce69aa1e/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 185e1b0..efb19b2 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,7 @@ INCOMPATIBLE CHANGES
     Default max limit increased. Should not affect existing users.
 
 ALL CHANGES:
+  TEZ-2426. Ensure the eventRouter thread completes before switching to a new task and thread safety fixes in IPOContexts.
   TEZ-2412. Should kill vertex in DAGImpl#VertexRerunWhileCommitting
   TEZ-2410. VertexGroupCommitFinishedEvent & VertexCommitStartedEvent is not logged correctly
   TEZ-776. Reduce AM mem usage caused by storing TezEvents

http://git-wip-us.apache.org/repos/asf/tez/blob/ce69aa1e/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java
index f465d3c..1cfe538 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java
@@ -30,6 +30,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.CompletionService;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorCompletionService;
 import java.util.concurrent.ExecutorService;
@@ -96,45 +97,46 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
   private static final Logger LOG = LoggerFactory
       .getLogger(LogicalIOProcessorRuntimeTask.class);
 
+  @VisibleForTesting // All fields non private for testing.
   private final String[] localDirs;
   /** Responsible for maintaining order of Inputs */
-  private final List<InputSpec> inputSpecs;
-  private final Map<String, LogicalInput> inputsMap;
-  private final Map<String, InputContext> inputContextMap;
+  final List<InputSpec> inputSpecs;
+  final ConcurrentMap<String, LogicalInput> inputsMap;
+  final ConcurrentMap<String, InputContext> inputContextMap;
   /** Responsible for maintaining order of Outputs */
-  private final List<OutputSpec> outputSpecs;
-  private final Map<String, LogicalOutput> outputsMap;
-  private final Map<String, OutputContext> outputContextMap;
+  final List<OutputSpec> outputSpecs;
+  final ConcurrentMap<String, LogicalOutput> outputsMap;
+  final ConcurrentMap<String, OutputContext> outputContextMap;
 
-  private final List<GroupInputSpec> groupInputSpecs;
-  private ConcurrentHashMap<String, MergedLogicalInput> groupInputsMap;
+  final List<GroupInputSpec> groupInputSpecs;
+  ConcurrentHashMap<String, MergedLogicalInput> groupInputsMap;
 
-  private final ProcessorDescriptor processorDescriptor;
-  private AbstractLogicalIOProcessor processor;
-  private ProcessorContext processorContext;
+  final ProcessorDescriptor processorDescriptor;
+  AbstractLogicalIOProcessor processor;
+  ProcessorContext processorContext;
 
   private final MemoryDistributor initialMemoryDistributor;
 
   /** Maps which will be provided to the processor run method */
-  private final LinkedHashMap<String, LogicalInput> runInputMap;
-  private final LinkedHashMap<String, LogicalOutput> runOutputMap;
+  final LinkedHashMap<String, LogicalInput> runInputMap;
+  final LinkedHashMap<String, LogicalOutput> runOutputMap;
   
   private final Map<String, ByteBuffer> serviceConsumerMetadata;
   private final Map<String, String> envMap;
 
-  private final ExecutorService initializerExecutor;
+  final ExecutorService initializerExecutor;
   private final CompletionService<Void> initializerCompletionService;
 
   private final Multimap<String, String> startedInputsMap;
 
-  private LinkedBlockingQueue<TezEvent> eventsToBeProcessed;
-  private Thread eventRouterThread = null;
+  LinkedBlockingQueue<TezEvent> eventsToBeProcessed;
+  Thread eventRouterThread = null;
 
   private final int appAttemptNumber;
 
-  private final InputReadyTracker inputReadyTracker;
+  private volatile InputReadyTracker inputReadyTracker;
   
-  private final ObjectRegistry objectRegistry;
+  private volatile ObjectRegistry objectRegistry;
   private final ExecutionContext ExecutionContext;
   private final long memAvailable;
 
@@ -143,6 +145,7 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
       Map<String, ByteBuffer> serviceConsumerMetadata, Map<String, String> envMap,
       Multimap<String, String> startedInputsMap, ObjectRegistry objectRegistry,
       String pid, ExecutionContext ExecutionContext, long memAvailable) throws IOException {
+    // Note: If adding any fields here, make sure they're cleaned up in the cleanupContext method.
     // TODO Remove jobToken from here post TEZ-421
     super(taskSpec, tezConf, tezUmbilical, pid);
     LOG.info("Initializing LogicalIOProcessorRuntimeTask with TaskSpec: "
@@ -361,6 +364,14 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
       setTaskDone();
       if (eventRouterThread != null) {
         eventRouterThread.interrupt();
+        LOG.info("Joining on EventRouter");
+        try {
+          eventRouterThread.join();
+        } catch (InterruptedException e) {
+          LOG.info("Ignoring interrupt while waiting for the router thread to die");
+          Thread.currentThread().interrupt();
+        }
+        eventRouterThread = null;
       }
     }
   }
@@ -694,14 +705,6 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
     eventRouterThread.start();
   }
 
-  private void cleanupInputOutputs() {
-    if (groupInputsMap != null) {
-      groupInputsMap.clear();
-    }
-    inputsMap.clear();
-    outputsMap.clear();
-  }
-
   private void closeContexts() throws IOException {
     closeContext(inputContextMap);
     closeContext(outputContextMap);
@@ -725,19 +728,62 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
     }
   }
 
-  public synchronized void cleanup() {
+  public void cleanup() throws InterruptedException {
+    LOG.info("Final Counters : " + getCounters().toShortString());
+    setTaskDone();
+    if (eventRouterThread != null) {
+      eventRouterThread.interrupt();
+      LOG.info("Joining on EventRouter");
+      try {
+        eventRouterThread.join();
+      } catch (InterruptedException e) {
+        LOG.info("Ignoring interrupt while waiting for the router thread to die");
+        Thread.currentThread().interrupt();
+      }
+      eventRouterThread = null;
+    }
     try {
-      cleanupInputOutputs();
       closeContexts();
+      // Cleanup references which may be held by misbehaved tasks.
+      cleanupStructures();
     } catch (IOException e) {
       LOG.info("Error while cleaning up contexts ", e);
     }
+  }
 
-    LOG.info("Final Counters : " + getCounters().toShortString());
-    setTaskDone();
-    if (eventRouterThread != null) {
-      eventRouterThread.interrupt();
+  private void cleanupStructures() {
+    if (initializerExecutor != null && !initializerExecutor.isShutdown()) {
+      initializerExecutor.shutdownNow();
+    }
+    inputsMap.clear();
+    outputsMap.clear();
+
+    inputSpecs.clear();
+    outputSpecs.clear();
+
+    inputsMap.clear();
+    outputsMap.clear();
+
+    inputContextMap.clear();
+    outputContextMap.clear();
+
+    if (groupInputSpecs != null) {
+      groupInputSpecs.clear();
     }
+    if (groupInputsMap != null) {
+      groupInputsMap.clear();
+      groupInputsMap = null;
+    }
+
+    processor = null;
+    processorContext = null;
+
+    runInputMap.clear();
+    runOutputMap.clear();
+
+    eventsToBeProcessed.clear();
+    inputReadyTracker = null;
+    objectRegistry = null;
   }
   
   @Private

http://git-wip-us.apache.org/repos/asf/tez/blob/ce69aa1e/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezInputContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezInputContextImpl.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezInputContextImpl.java
index 101aeb9..8d6466a 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezInputContextImpl.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezInputContextImpl.java
@@ -55,12 +55,12 @@ public class TezInputContextImpl extends TezTaskContextImpl
 
   private static final Logger LOG = LoggerFactory.getLogger(TezInputContextImpl.class);
 
-  private UserPayload userPayload;
+  private volatile UserPayload userPayload;
   private final String sourceVertexName;
   private final EventMetaData sourceInfo;
   private final int inputIndex;
   private final Map<String, LogicalInput> inputs;
-  private InputReadyTracker inputReadyTracker;
+  private volatile InputReadyTracker inputReadyTracker;
   private final InputStatisticsReporterImpl statsReporter;
   
   class InputStatisticsReporterImpl implements InputStatisticsReporter {
@@ -159,7 +159,11 @@ public class TezInputContextImpl extends TezTaskContextImpl
 
   @Override
   public void inputIsReady() {
-    inputReadyTracker.setInputIsReady(inputs.get(sourceVertexName));
+    if (inputReadyTracker != null) {
+      inputReadyTracker.setInputIsReady(inputs.get(sourceVertexName));
+    } else {
+      LOG.warn("Ignoring Input Ready notification since the Task has already been closed");
+    }
   }
 
   @Override
@@ -172,7 +176,6 @@ public class TezInputContextImpl extends TezTaskContextImpl
     super.close();
     this.userPayload = null;
     this.inputReadyTracker = null;
-    inputs.clear();
     LOG.info("Cleared TezInputContextImpl related information");
   }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/ce69aa1e/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezOutputContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezOutputContextImpl.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezOutputContextImpl.java
index b46cfd2..71e96db 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezOutputContextImpl.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezOutputContextImpl.java
@@ -53,7 +53,7 @@ public class TezOutputContextImpl extends TezTaskContextImpl
 
   private static final Logger LOG = LoggerFactory.getLogger(TezOutputContextImpl.class);
 
-  private UserPayload userPayload;
+  private volatile UserPayload userPayload;
   private final String destinationVertexName;
   private final EventMetaData sourceInfo;
   private final int outputIndex;

http://git-wip-us.apache.org/repos/asf/tez/blob/ce69aa1e/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezProcessorContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezProcessorContextImpl.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezProcessorContextImpl.java
index d6b3ec5..a191ae8 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezProcessorContextImpl.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezProcessorContextImpl.java
@@ -53,8 +53,8 @@ public class TezProcessorContextImpl extends TezTaskContextImpl implements Proce
 
   private static final Logger LOG = LoggerFactory.getLogger(TezProcessorContextImpl.class);
 
-  private UserPayload userPayload;
-  private InputReadyTracker inputReadyTracker;
+  private volatile UserPayload userPayload;
+  private volatile InputReadyTracker inputReadyTracker;
   private final EventMetaData sourceInfo;
 
   public TezProcessorContextImpl(Configuration conf, String[] workDirs, int appAttemptNumber,

http://git-wip-us.apache.org/repos/asf/tez/blob/ce69aa1e/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezTaskContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezTaskContextImpl.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezTaskContextImpl.java
index 170741a..5f04c80 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezTaskContextImpl.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezTaskContextImpl.java
@@ -54,15 +54,15 @@ public abstract class TezTaskContextImpl implements TaskContext, Closeable {
   private final TezCounters counters;
   private String[] workDirs;
   private String uniqueIdentifier;
-  protected LogicalIOProcessorRuntimeTask runtimeTask;
+  protected final LogicalIOProcessorRuntimeTask runtimeTask;
   protected final TezUmbilical tezUmbilical;
   private final Map<String, ByteBuffer> serviceConsumerMetadata;
   private final int appAttemptNumber;
   private final Map<String, String> auxServiceEnv;
-  protected MemoryDistributor initialMemoryDistributor;
+  protected volatile MemoryDistributor initialMemoryDistributor;
   protected final EntityDescriptor<?> descriptor;
   private final String dagName;
-  private ObjectRegistry objectRegistry;
+  private volatile ObjectRegistry objectRegistry;
   private final int vertexParallelism;
   private final ExecutionContext ExecutionContext;
   private final long memAvailable;
@@ -225,7 +225,8 @@ public abstract class TezTaskContextImpl implements TaskContext, Closeable {
 
   @Override
   public void close() throws IOException {
-    this.runtimeTask = null;
+    Preconditions.checkState(runtimeTask.isTaskDone(),
+        "Runtime task must be complete before calling cleanup");
     this.objectRegistry = null;
     this.initialMemoryDistributor = null;
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/ce69aa1e/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
index 3d1d1a2..8b9db16 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
@@ -27,6 +27,7 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.locks.Condition;
@@ -112,6 +113,7 @@ public class TaskReporter {
   public synchronized void unregisterTask(TezTaskAttemptID taskAttemptID) {
     currentCallable.markComplete();
     currentCallable = null;
+    // KKK Make sure the callable completes before proceeding
   }
   
   public void shutdown() {
@@ -125,7 +127,7 @@ public class TaskReporter {
     private static final float LOG_COUNTER_BACKOFF = 1.3f;
 
     private final RuntimeTask task;
-    private EventMetaData updateEventMetadata;
+    private final EventMetaData updateEventMetadata;
 
     private final TezTaskUmbilicalProtocol umbilical;
 
@@ -136,6 +138,9 @@ public class TaskReporter {
 
     private final AtomicLong requestCounter;
 
+    private final AtomicBoolean finalEventQueued = new AtomicBoolean(false);
+    private final AtomicBoolean askedToDie = new AtomicBoolean(false);
+
     private LinkedBlockingQueue<TezEvent> eventsToSend = new LinkedBlockingQueue<TezEvent>();
 
     private final ReentrantLock lock = new ReentrantLock();
@@ -199,6 +204,9 @@ public class TaskReporter {
       }
       int pendingEventCount = eventsToSend.size();
       if (pendingEventCount > 0) {
+        // This is OK because the pending events will be sent via the succeeded/failed messages.
+        // TaskDone is set before taskSucceeded / taskFailed are sent out - which is what causes the
+        // thread to exit.
         LOG.warn("Exiting TaskReporter thread with pending queue size=" + pendingEventCount);
       }
       return true;
@@ -256,6 +264,7 @@ public class TaskReporter {
 
       if (response.shouldDie()) {
         LOG.info("Received should die response from AM");
+        askedToDie.set(true);
         return new ResponseWrapper(true, 1);
       }
       if (response.getLastRequestId() != requestId) {
@@ -268,7 +277,7 @@ public class TaskReporter {
       int numEventsReceived = 0;
       if (task.isTaskDone() || task.hadFatalError()) {
         if (response.getEvents() != null && !response.getEvents().isEmpty()) {
-          LOG.warn("Current task already complete, Ignoring all event in"
+          LOG.info("Current task already complete, Ignoring all event in"
               + " heartbeat response, eventCount=" + response.getEvents().size());
         }
       } else {
@@ -315,10 +324,16 @@ public class TaskReporter {
      *           indicates an exception somewhere in the AM.
      */
     private boolean taskSucceeded(TezTaskAttemptID taskAttemptID) throws IOException, TezException {
-      TezEvent statusUpdateEvent = new TezEvent(getStatusUpdateEvent(true), updateEventMetadata);
-      TezEvent taskCompletedEvent = new TezEvent(new TaskAttemptCompletedEvent(),
-          updateEventMetadata);
-      return !heartbeat(Lists.newArrayList(statusUpdateEvent, taskCompletedEvent)).shouldDie;
+      // Ensure only one final event is ever sent.
+      if (!finalEventQueued.getAndSet(true)) {
+        TezEvent statusUpdateEvent = new TezEvent(getStatusUpdateEvent(true), updateEventMetadata);
+        TezEvent taskCompletedEvent = new TezEvent(new TaskAttemptCompletedEvent(),
+            updateEventMetadata);
+        return !heartbeat(Lists.newArrayList(statusUpdateEvent, taskCompletedEvent)).shouldDie;
+      } else {
+        LOG.warn("A final task state event has already been sent. Not sending again");
+        return askedToDie.get();
+      }
     }
     
     @VisibleForTesting
@@ -351,15 +366,21 @@ public class TaskReporter {
      */
     private boolean taskFailed(TezTaskAttemptID taskAttemptID, Throwable t, String diagnostics,
         EventMetaData srcMeta) throws IOException, TezException {
-      TezEvent statusUpdateEvent = new TezEvent(getStatusUpdateEvent(true), updateEventMetadata);
-      if (diagnostics == null) {
-        diagnostics = ExceptionUtils.getStackTrace(t);
+      // Ensure only one final event is ever sent.
+      if (!finalEventQueued.getAndSet(true)) {
+        TezEvent statusUpdateEvent = new TezEvent(getStatusUpdateEvent(true), updateEventMetadata);
+        if (diagnostics == null) {
+          diagnostics = ExceptionUtils.getStackTrace(t);
+        } else {
+          diagnostics = diagnostics + ":" + ExceptionUtils.getStackTrace(t);
+        }
+        TezEvent taskAttemptFailedEvent = new TezEvent(new TaskAttemptFailedEvent(diagnostics),
+            srcMeta == null ? updateEventMetadata : srcMeta);
+        return !heartbeat(Lists.newArrayList(statusUpdateEvent, taskAttemptFailedEvent)).shouldDie;
       } else {
-        diagnostics = diagnostics + ":" + ExceptionUtils.getStackTrace(t);
+        LOG.warn("A final task state event has already been sent. Not sending again");
+        return askedToDie.get();
       }
-      TezEvent taskAttemptFailedEvent = new TezEvent(new TaskAttemptFailedEvent(diagnostics),
-          srcMeta == null ? updateEventMetadata : srcMeta);
-      return !heartbeat(Lists.newArrayList(statusUpdateEvent, taskAttemptFailedEvent)).shouldDie;
     }
 
     private void addEvents(TezTaskAttemptID taskAttemptID, Collection<TezEvent> events) {

http://git-wip-us.apache.org/repos/asf/tez/blob/ce69aa1e/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestLogicalIOProcessorRuntimeTask.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestLogicalIOProcessorRuntimeTask.java b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestLogicalIOProcessorRuntimeTask.java
index df932cf..b337bc7 100644
--- a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestLogicalIOProcessorRuntimeTask.java
+++ b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestLogicalIOProcessorRuntimeTask.java
@@ -19,6 +19,7 @@
 package org.apache.tez.runtime;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 import static org.mockito.Mockito.mock;
@@ -26,6 +27,7 @@ import static org.mockito.Mockito.mock;
 import java.nio.ByteBuffer;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 
@@ -33,6 +35,7 @@ import org.apache.tez.dag.api.InputDescriptor;
 import org.apache.tez.dag.api.OutputDescriptor;
 import org.apache.tez.dag.api.ProcessorDescriptor;
 import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.UserPayload;
 import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
@@ -133,29 +136,43 @@ public class TestLogicalIOProcessorRuntimeTask {
 
   }
 
-  private void cleanupAndTest(LogicalIOProcessorRuntimeTask lio) {
+  private void cleanupAndTest(LogicalIOProcessorRuntimeTask lio) throws InterruptedException {
+
+    ProcessorContext procContext = lio.getProcessorContext();
+    List<InputContext> inputContexts = new LinkedList<InputContext>();
+    inputContexts.addAll(lio.getInputContexts());
+    List<OutputContext> outputContexts = new LinkedList<OutputContext>();
+    outputContexts.addAll(lio.getOutputContexts());
 
     lio.cleanup();
 
-    assertTrue(lio.getProcessorContext().getUserPayload() == null);
-    assertTrue(lio.getProcessorContext().getObjectRegistry() == null);
+    assertTrue(procContext.getUserPayload() == null);
+    assertTrue(procContext.getObjectRegistry() == null);
 
-    try {
-      lio.getProcessorContext().waitForAnyInputReady(Collections.<Input>emptyList());
-      fail("Processor context should have been already cleanup");
-    } catch (Throwable t) {
-      assertTrue(t instanceof NullPointerException);
+    for (InputContext inputContext : inputContexts) {
+      assertTrue(inputContext.getUserPayload() == null);
+      assertTrue(inputContext.getObjectRegistry() == null);
     }
 
-    try {
-      lio.getProcessorContext().requestInitialMemory(0, null);
-      fail("Processor context should have been already cleanup");
-    } catch (Throwable t) {
-      assertTrue(t instanceof NullPointerException);
+    for (OutputContext outputContext : outputContexts) {
+      assertTrue(outputContext.getUserPayload() == null);
+      assertTrue(outputContext.getObjectRegistry() == null);
     }
 
-    assertTrue(lio.getInputContexts().size() == 0);
-    assertTrue(lio.getOutputContexts().size() == 0);
+    assertEquals(0, lio.inputSpecs.size());
+    assertEquals(0, lio.inputsMap.size());
+    assertEquals(0, lio.inputContextMap.size());
+    assertEquals(0, lio.outputSpecs.size());
+    assertEquals(0, lio.outputsMap.size());
+    assertEquals(0, lio.outputContextMap.size());
+    assertTrue(lio.groupInputSpecs == null || lio.groupInputSpecs.size() == 0);
+    assertNull(lio.groupInputsMap);
+    assertNull(lio.processor);
+    assertNull(lio.processorContext);
+    assertEquals(0, lio.runInputMap.size());
+    assertEquals(0, lio.runOutputMap.size());
+    assertEquals(0, lio.eventsToBeProcessed.size());
+    assertNull(lio.eventRouterThread);
   }
 
   private TaskSpec createTaskSpec(TezTaskAttemptID taskAttemptID,
@@ -248,7 +265,6 @@ public class TestLogicalIOProcessorRuntimeTask {
     public void start() throws Exception {
       startCount++;
       this.vertexParallelism = getContext().getVertexParallelism();
-      System.err.println("In started");
     }
 
     @Override


[11/43] tez git commit: TEZ-2423. Tez UI: Remove Attempt Index column from task->attempts page (Sreenath Somarajapuram via pramachandran)

Posted by ss...@apache.org.
TEZ-2423. Tez UI: Remove Attempt Index column from task->attempts page (Sreenath Somarajapuram via pramachandran)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/a382324c
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/a382324c
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/a382324c

Branch: refs/heads/TEZ-2003
Commit: a382324c24a30ef7b632ca1f003a90873ac802e3
Parents: 70cd396
Author: Prakash Ramachandran <pr...@hortonworks.com>
Authored: Thu May 7 17:52:00 2015 +0530
Committer: Prakash Ramachandran <pr...@hortonworks.com>
Committed: Thu May 7 17:52:00 2015 +0530

----------------------------------------------------------------------
 CHANGES.txt                                          |  1 +
 .../controllers/task_task_attempts_controller.js     | 15 ---------------
 2 files changed, 1 insertion(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/a382324c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index c3d48b6..8de61b0 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,7 @@ INCOMPATIBLE CHANGES
     Default max limit increased. Should not affect existing users.
 
 ALL CHANGES:
+  TEZ-2423. Tez UI: Remove Attempt Index column from task->attempts page
   TEZ-2416. Tez UI: Make tooltips display faster.
   TEZ-2404. Handle DataMovementEvent before its TaskAttemptCompletedEvent
   TEZ-2424. Bump up max counter group name length limit to account for per_io counters.

http://git-wip-us.apache.org/repos/asf/tez/blob/a382324c/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js b/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js
index c5c9eea..d211479 100644
--- a/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js
+++ b/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js
@@ -62,21 +62,6 @@ App.TaskAttemptsController = App.TablePageController.extend(App.AutoCounterColum
     var that = this;
     return [
       {
-        id: 'id',
-        headerCellName: 'Attempt Index',
-        templateName: 'components/basic-table/linked-cell',
-        contentPath: 'id',
-        getCellContent: function (row) {
-          var id = row.get('id'),
-              idPrefix = 'attempt_%@_'.fmt(row.get('dagID').substr(4));
-          return {
-            linkTo: 'taskAttempt',
-            entityId: id,
-            displayText: id.indexOf(idPrefix) == 0 ? id.substr(idPrefix.length) : id
-          };
-        }
-      },
-      {
         id: 'attemptNo',
         headerCellName: 'Attempt No',
         templateName: 'components/basic-table/linked-cell',


[33/43] tez git commit: TEZ-2241. Miscellaneous fixes after last reabse. (sseth)

Posted by ss...@apache.org.
TEZ-2241. Miscellaneous fixes after last reabse. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/5a46aa59
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/5a46aa59
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/5a46aa59

Branch: refs/heads/TEZ-2003
Commit: 5a46aa598a3455ae48354863f91ebb40ee793157
Parents: e1ab191
Author: Siddharth Seth <ss...@apache.org>
Authored: Thu Mar 26 17:57:55 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:30 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                                          | 1 +
 .../org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java  | 2 ++
 .../org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java  | 6 +++---
 .../java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java  | 6 +++---
 .../apache/tez/dag/app/launcher/ContainerLauncherRouter.java  | 6 +++---
 .../test/java/org/apache/tez/dag/app/MockDAGAppMaster.java    | 2 +-
 tez-ext-service-tests/pom.xml                                 | 4 ++--
 .../tez/dag/app/launcher/TezTestServiceContainerLauncher.java | 6 +++---
 .../dag/app/launcher/TezTestServiceNoOpContainerLauncher.java | 6 +++---
 .../tez/dag/app/rm/TezTestServiceTaskSchedulerService.java    | 7 ++++---
 .../dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java  | 7 ++++---
 .../org/apache/tez/service/MiniTezTestServiceCluster.java     | 6 +++---
 .../java/org/apache/tez/service/impl/ContainerRunnerImpl.java | 5 +++--
 .../test/java/org/apache/tez/service/impl/TezTestService.java | 4 ++--
 .../tez/service/impl/TezTestServiceProtocolServerImpl.java    | 6 +++---
 .../org/apache/tez/shufflehandler/FadvisedChunkedFile.java    | 6 +++---
 .../org/apache/tez/shufflehandler/FadvisedFileRegion.java     | 6 +++---
 .../test/java/org/apache/tez/shufflehandler/IndexCache.java   | 6 +++---
 .../java/org/apache/tez/shufflehandler/ShuffleHandler.java    | 6 +++---
 .../java/org/apache/tez/tests/TestExternalTezServices.java    | 6 +++---
 20 files changed, 55 insertions(+), 49 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 774a685..d51686d 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -11,5 +11,6 @@ ALL CHANGES:
   TEZ-2139. Update tez version to 0.7.0-TEZ-2003-SNAPSHOT.
   TEZ-2175. Task priority should be available to the TaskCommunicator plugin.
   TEZ-2187. Allow TaskCommunicators to report failed / killed attempts.
+  TEZ-2241. Miscellaneous fixes after last reabse.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index 94f6cae..0ee448b 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -67,6 +67,8 @@ import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezVertexID;
 import org.apache.tez.runtime.api.impl.TezEvent;
 import org.apache.tez.common.security.JobTokenSecretManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 
 @SuppressWarnings("unchecked")

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java
index 3704cc4..47688d1 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java
@@ -18,14 +18,14 @@ import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.net.UnknownHostException;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.tez.dag.api.TaskCommunicatorContext;
 import org.apache.tez.dag.api.TezUncheckedException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class TezLocalTaskCommunicatorImpl extends TezTaskCommunicatorImpl {
 
-  private static final Log LOG = LogFactory.getLog(TezLocalTaskCommunicatorImpl.class);
+  private static final Logger LOG = LoggerFactory.getLogger(TezLocalTaskCommunicatorImpl.class);
 
   public TezLocalTaskCommunicatorImpl(
       TaskCommunicatorContext taskCommunicatorContext) {

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
index f288748..9d0c031 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -23,8 +23,6 @@ import java.util.concurrent.ConcurrentMap;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Maps;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
@@ -55,11 +53,13 @@ import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.runtime.api.impl.TaskSpec;
 import org.apache.tez.runtime.api.impl.TezHeartbeatRequest;
 import org.apache.tez.runtime.api.impl.TezHeartbeatResponse;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 @InterfaceAudience.Private
 public class TezTaskCommunicatorImpl extends TaskCommunicator {
 
-  private static final Log LOG = LogFactory.getLog(TezTaskCommunicatorImpl.class);
+  private static final Logger LOG = LoggerFactory.getLogger(TezTaskCommunicatorImpl.class);
 
   private static final ContainerTask TASK_FOR_INVALID_JVM = new ContainerTask(
       null, true, null, null, false);

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
index 70b0cbc..dd3571e 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
@@ -19,8 +19,6 @@ import java.lang.reflect.InvocationTargetException;
 import java.net.UnknownHostException;
 
 import com.google.common.annotations.VisibleForTesting;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.yarn.event.EventHandler;
@@ -30,11 +28,13 @@ import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.dag.app.TaskAttemptListener;
 import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class ContainerLauncherRouter extends AbstractService
     implements EventHandler<NMCommunicatorEvent> {
 
-  static final Log LOG = LogFactory.getLog(ContainerLauncherImpl.class);
+  static final Logger LOG = LoggerFactory.getLogger(ContainerLauncherImpl.class);
 
   private final ContainerLauncher containerLaunchers[];
 

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index 4f014a4..051497b 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -340,7 +340,7 @@ public class MockDAGAppMaster extends DAGAppMaster {
       long startTime = System.nanoTime();
       long startCpuTime = threadMxBean.getCurrentThreadCpuTime();
       TaskHeartbeatResponse response = taListener.heartbeat(request);
-      if (response.shouldDie()) {
+      if (response.isShouldDie()) {
         cData.remove();
       } else {
         cData.nextFromEventId += response.getNextFromEventId();

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/pom.xml
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/pom.xml b/tez-ext-service-tests/pom.xml
index 1113341..907e129 100644
--- a/tez-ext-service-tests/pom.xml
+++ b/tez-ext-service-tests/pom.xml
@@ -28,8 +28,8 @@
 
   <dependencies>
     <dependency>
-      <groupId>log4j</groupId>
-      <artifactId>log4j</artifactId>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
     </dependency>
     <dependency>
       <groupId>com.google.guava</groupId>

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
index cb6308c..9d22196 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
@@ -18,8 +18,6 @@ import java.net.InetSocketAddress;
 
 import com.google.common.base.Preconditions;
 import com.google.protobuf.ByteString;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
@@ -40,6 +38,8 @@ import org.apache.tez.dag.history.events.ContainerLaunchedEvent;
 import org.apache.tez.service.TezTestServiceConfConstants;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class TezTestServiceContainerLauncher extends AbstractService implements ContainerLauncher {
 
@@ -47,7 +47,7 @@ public class TezTestServiceContainerLauncher extends AbstractService implements
 
   // TODO May need multiple connections per target machine, depending upon how synchronization is handled in the RPC layer
 
-  static final Log LOG = LogFactory.getLog(TezTestServiceContainerLauncher.class);
+  static final Logger LOG = LoggerFactory.getLogger(TezTestServiceContainerLauncher.class);
 
   private final AppContext context;
   private final String tokenIdentifier;

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java
index 8c8e486..977d0d3 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java
@@ -14,8 +14,6 @@
 
 package org.apache.tez.dag.app.launcher;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.yarn.util.Clock;
@@ -28,10 +26,12 @@ import org.apache.tez.dag.app.rm.container.AMContainerEventLaunched;
 import org.apache.tez.dag.app.rm.container.AMContainerEventType;
 import org.apache.tez.dag.history.DAGHistoryEvent;
 import org.apache.tez.dag.history.events.ContainerLaunchedEvent;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class TezTestServiceNoOpContainerLauncher extends AbstractService implements ContainerLauncher {
 
-  static final Log LOG = LogFactory.getLog(TezTestServiceNoOpContainerLauncher.class);
+  static final Logger LOG = LoggerFactory.getLogger(TezTestServiceNoOpContainerLauncher.class);
 
   private final AppContext context;
   private final Clock clock;

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
index 872d592..50dfb24 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
@@ -28,8 +28,6 @@ import java.util.concurrent.atomic.AtomicInteger;
 import com.google.common.base.Preconditions;
 import com.google.common.primitives.Ints;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -41,11 +39,14 @@ import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.tez.dag.api.TaskAttemptEndReason;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.service.TezTestServiceConfConstants;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 
 public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
 
-  private static final Log LOG = LogFactory.getLog(TezTestServiceTaskSchedulerService.class);
+  private static final Logger
+      LOG = LoggerFactory.getLogger(TezTestServiceTaskSchedulerService.class);
 
   private final ExecutorService appCallbackExecutor;
   private final TaskSchedulerAppCallback appClientDelegate;

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
index e3385a2..ef983c2 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
@@ -23,8 +23,6 @@ import java.util.concurrent.RejectedExecutionException;
 
 import com.google.protobuf.ByteString;
 import com.google.protobuf.ServiceException;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.DataOutputBuffer;
 import org.apache.hadoop.ipc.RemoteException;
@@ -41,11 +39,14 @@ import org.apache.tez.runtime.api.impl.TaskSpec;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkResponseProto;
 import org.apache.tez.util.ProtoConverters;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 
 public class TezTestServiceTaskCommunicatorImpl extends TezTaskCommunicatorImpl {
 
-  private static final Log LOG = LogFactory.getLog(TezTestServiceTaskCommunicatorImpl.class);
+  private static final Logger
+      LOG = LoggerFactory.getLogger(TezTestServiceTaskCommunicatorImpl.class);
 
   private final TezTestServiceCommunicator communicator;
   private final SubmitWorkRequestProto BASE_SUBMIT_WORK_REQUEST;

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
index 0ac0b33..c5ff02d 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
@@ -19,8 +19,6 @@ import java.io.IOException;
 import java.net.InetSocketAddress;
 
 import com.google.common.base.Preconditions;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.Path;
@@ -28,10 +26,12 @@ import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.util.Shell;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.tez.service.impl.TezTestService;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class MiniTezTestServiceCluster extends AbstractService {
 
-  private static final Log LOG = LogFactory.getLog(MiniTezTestServiceCluster.class);
+  private static final Logger LOG = LoggerFactory.getLogger(MiniTezTestServiceCluster.class);
 
   private final File testWorkDir;
   private final long availableMemory;

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
index 379d952..e7c02c8 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
@@ -52,7 +52,6 @@ import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
 import org.apache.hadoop.yarn.util.AuxiliaryServiceHelper;
-import org.apache.log4j.Logger;
 import org.apache.tez.common.TezCommonUtils;
 import org.apache.tez.common.TezTaskUmbilicalProtocol;
 import org.apache.tez.common.security.JobTokenIdentifier;
@@ -75,10 +74,12 @@ import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainer
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.TaskSpecProto;
 import org.apache.tez.util.ProtoConverters;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class ContainerRunnerImpl extends AbstractService implements ContainerRunner {
 
-  private static final Logger LOG = Logger.getLogger(ContainerRunnerImpl.class);
+  private static final Logger LOG = LoggerFactory.getLogger(ContainerRunnerImpl.class);
 
   public static final String DAG_NAME_INSTRUMENTED_FAILURES = "InstrumentedFailures";
 

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
index 855f1b0..322be00 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
@@ -23,16 +23,16 @@ import com.google.common.base.Preconditions;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.util.StringUtils;
-import org.apache.log4j.Logger;
 import org.apache.tez.dag.api.TezException;
 import org.apache.tez.service.ContainerRunner;
 import org.apache.tez.shufflehandler.ShuffleHandler;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+import org.slf4j.LoggerFactory;
 
 public class TezTestService extends AbstractService implements ContainerRunner {
 
-  private static final Logger LOG = Logger.getLogger(TezTestService.class);
+  private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(TezTestService.class);
 
   private final Configuration shuffleHandlerConf;
   private final int numExecutors;

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
index 39d7156..b5d3f83 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
@@ -22,8 +22,6 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.protobuf.BlockingService;
 import com.google.protobuf.RpcController;
 import com.google.protobuf.ServiceException;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.ipc.ProtobufRpcEngine;
@@ -38,11 +36,13 @@ import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainer
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerResponseProto;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkResponseProto;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class TezTestServiceProtocolServerImpl extends AbstractService
     implements TezTestServiceProtocolBlockingPB {
 
-  private static final Log LOG = LogFactory.getLog(TezTestServiceProtocolServerImpl.class);
+  private static final Logger LOG = LoggerFactory.getLogger(TezTestServiceProtocolServerImpl.class);
 
   private final ContainerRunner containerRunner;
   private RPC.Server server;

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java
index 65588fe..294add6 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java
@@ -18,16 +18,16 @@ import java.io.FileDescriptor;
 import java.io.IOException;
 import java.io.RandomAccessFile;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.io.ReadaheadPool;
 import org.apache.hadoop.io.ReadaheadPool.ReadaheadRequest;
 import org.apache.hadoop.io.nativeio.NativeIO;
 import org.jboss.netty.handler.stream.ChunkedFile;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class FadvisedChunkedFile extends ChunkedFile {
 
-  private static final Log LOG = LogFactory.getLog(FadvisedChunkedFile.class);
+  private static final Logger LOG = LoggerFactory.getLogger(FadvisedChunkedFile.class);
 
   private final boolean manageOsCache;
   private final int readaheadLength;

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java
index bdffe52..e5392d3 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java
@@ -22,16 +22,16 @@ import java.nio.channels.FileChannel;
 import java.nio.channels.WritableByteChannel;
 
 import com.google.common.annotations.VisibleForTesting;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.io.ReadaheadPool;
 import org.apache.hadoop.io.ReadaheadPool.ReadaheadRequest;
 import org.apache.hadoop.io.nativeio.NativeIO;
 import org.jboss.netty.channel.DefaultFileRegion;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class FadvisedFileRegion extends DefaultFileRegion {
 
-  private static final Log LOG = LogFactory.getLog(FadvisedFileRegion.class);
+  private static final Logger LOG = LoggerFactory.getLogger(FadvisedFileRegion.class);
 
   private final boolean manageOsCache;
   private final int readaheadLength;

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java
index 9a51ca0..5a45917 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java
@@ -22,20 +22,20 @@ import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.atomic.AtomicInteger;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.tez.runtime.library.common.Constants;
 import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord;
 import org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 class IndexCache {
 
   private final Configuration conf;
   private final int totalMemoryAllowed;
   private AtomicInteger totalMemoryUsed = new AtomicInteger();
-  private static final Log LOG = LogFactory.getLog(IndexCache.class);
+  private static final Logger LOG = LoggerFactory.getLogger(IndexCache.class);
 
   private final ConcurrentHashMap<String,IndexInformation> cache =
       new ConcurrentHashMap<String,IndexInformation>();

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java
index cc82d74..8cbb8c7 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java
@@ -51,8 +51,6 @@ import java.util.regex.Pattern;
 import com.google.common.base.Charsets;
 import com.google.common.base.Preconditions;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.LocalDirAllocator;
 import org.apache.hadoop.fs.Path;
@@ -105,10 +103,12 @@ import org.jboss.netty.handler.codec.http.QueryStringDecoder;
 import org.jboss.netty.handler.ssl.SslHandler;
 import org.jboss.netty.handler.stream.ChunkedWriteHandler;
 import org.jboss.netty.util.CharsetUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class ShuffleHandler {
 
-  private static final Log LOG = LogFactory.getLog(ShuffleHandler.class);
+  private static final Logger LOG = LoggerFactory.getLogger(ShuffleHandler.class);
 
   public static final String SHUFFLE_HANDLER_LOCAL_DIRS = "tez.shuffle.handler.local-dirs";
 

http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
index b6a166d..4d0a610 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
@@ -20,8 +20,6 @@ import java.io.IOException;
 import java.util.Map;
 
 import com.google.common.collect.Maps;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -48,10 +46,12 @@ import org.apache.tez.test.MiniTezCluster;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class TestExternalTezServices {
 
-  private static final Log LOG = LogFactory.getLog(TestExternalTezServices.class);
+  private static final Logger LOG = LoggerFactory.getLogger(TestExternalTezServices.class);
 
   private static final String EXT_PUSH_ENTITY_NAME = "ExtServiceTestPush";
 


[07/43] tez git commit: TEZ-2424. Bump up max counter group name length limit to account for per_io counters. (hitesh)

Posted by ss...@apache.org.
TEZ-2424. Bump up max counter group name length limit to account for per_io counters. (hitesh)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/78ca7af6
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/78ca7af6
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/78ca7af6

Branch: refs/heads/TEZ-2003
Commit: 78ca7af63987e6052d8756f9ab827cb10b26dbcf
Parents: 5530863
Author: Hitesh Shah <hi...@apache.org>
Authored: Wed May 6 15:00:48 2015 -0700
Committer: Hitesh Shah <hi...@apache.org>
Committed: Wed May 6 15:00:48 2015 -0700

----------------------------------------------------------------------
 CHANGES.txt                                                     | 5 ++++-
 .../src/main/java/org/apache/tez/dag/api/TezConfiguration.java  | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/78ca7af6/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index c865f12..73f8fda 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -14,9 +14,12 @@ Release 0.7.0: Unreleased
 
 INCOMPATIBLE CHANGES
   TEZ-2176. Move all logging to slf4j. (commons-logging jar no longer part of Tez tar)
-  TEZ-1993. Implement a pluggable InputSizeEstimator for grouping fairly
+  TEZ-1993. Implement a pluggable InputSizeEstimator for grouping fairly.
+  TEZ-2424. Bump up max counter group name length limit to account for per_io counters.
+    Default max limit increased. Should not affect existing users.
 
 ALL CHANGES:
+  TEZ-2424. Bump up max counter group name length limit to account for per_io counters.
   TEZ-2417. Tez UI: Counters are blank in the Attempts page if all attempts failed
   TEZ-2366. Pig tez MiniTezCluster unit tests fail intermittently after TEZ-2333
   TEZ-2406. Tez UI: Display per-io counter columns in task and attempt pages under vertex

http://git-wip-us.apache.org/repos/asf/tez/blob/78ca7af6/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
index f64172e..e4170e9 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
@@ -372,7 +372,7 @@ public class TezConfiguration extends Configuration {
   @ConfigurationScope(Scope.AM)
   public static final String TEZ_COUNTERS_GROUP_NAME_MAX_LENGTH =
       TEZ_PREFIX + "counters.group-name.max-length";
-  public static final int TEZ_COUNTERS_GROUP_NAME_MAX_LENGTH_DEFAULT = 128;
+  public static final int TEZ_COUNTERS_GROUP_NAME_MAX_LENGTH_DEFAULT = 256;
 
   @Unstable
   /**


[37/43] tez git commit: TEZ-2302. Allow TaskCommunicators to subscribe for Vertex updates. (sseth)

Posted by ss...@apache.org.
TEZ-2302. Allow TaskCommunicators to subscribe for Vertex updates. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/e7c3cc72
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/e7c3cc72
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/e7c3cc72

Branch: refs/heads/TEZ-2003
Commit: e7c3cc72ec6c7c680adb2be7c2594a77409426d4
Parents: 5239a45
Author: Siddharth Seth <ss...@apache.org>
Authored: Thu Apr 9 13:33:48 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:31 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                            |   1 +
 .../apache/tez/dag/api/TaskCommunicator.java    |  20 +++
 .../tez/dag/api/TaskCommunicatorContext.java    |  14 ++-
 .../dag/app/TaskAttemptListenerImpTezDag.java   |  52 +++-----
 .../dag/app/TaskCommunicatorContextImpl.java    | 124 +++++++++++++++++++
 .../tez/dag/app/TezTaskCommunicatorImpl.java    |   6 +
 .../java/org/apache/tez/dag/app/dag/DAG.java    |   2 +
 .../apache/tez/dag/app/dag/impl/DAGImpl.java    |   5 +
 8 files changed, 188 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/e7c3cc72/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 9d6b220..ca5225e 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -15,5 +15,6 @@ ALL CHANGES:
   TEZ-2283. Fixes after rebase 04/07.
   TEZ-2284. Separate TaskReporter into an interface.
   TEZ-2285. Allow TaskCommunicators to indicate task/container liveness.
+  TEZ-2302. Allow TaskCommunicators to subscribe for Vertex updates.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/e7c3cc72/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
index 945091e..a2cd858 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.tez.dag.api.event.VertexStateUpdate;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.runtime.api.impl.TaskSpec;
 
@@ -54,4 +55,23 @@ public abstract class TaskCommunicator extends AbstractService {
   public abstract InetSocketAddress getAddress();
 
   // TODO Eventually. Add methods here to support preemption of tasks.
+
+  /**
+   * Receive notifications on vertex state changes.
+   * <p/>
+   * State changes will be received based on the registration via {@link
+   * org.apache.tez.runtime.api.InputInitializerContext#registerForVertexStateUpdates(String,
+   * java.util.Set)}. Notifications will be received for all registered state changes, and not just
+   * for the latest state update. They will be in order in which the state change occurred. </p>
+   *
+   * Extensive processing should not be performed via this method call. Instead this should just be
+   * used as a notification mechanism.
+   * <br>This method may be invoked concurrently with other invocations into the TaskCommunicator and
+   * multi-threading/concurrency implications must be considered.
+   * @param stateUpdate an event indicating the name of the vertex, and it's updated state.
+   *                    Additional information may be available for specific events, Look at the
+   *                    type hierarchy for {@link org.apache.tez.dag.api.event.VertexStateUpdate}
+   * @throws Exception
+   */
+  public abstract void onVertexStateUpdated(VertexStateUpdate stateUpdate) throws Exception;
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/e7c3cc72/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
index 0c3bac3..19caed9 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
@@ -16,10 +16,12 @@ package org.apache.tez.dag.api;
 
 import javax.annotation.Nullable;
 import java.io.IOException;
+import java.util.Set;
 
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.tez.dag.api.event.VertexState;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 
 
@@ -48,7 +50,7 @@ public interface TaskCommunicatorContext {
   void containerAlive(ContainerId containerId);
 
   // TODO TEZ-2003 Move to vertex, taskIndex, version. Rename to taskAttempt*
-  void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId);
+  void taskStartedRemotely(TezTaskAttemptID taskAttemptId, ContainerId containerId);
 
   // TODO TEZ-2003 Move to vertex, taskIndex, version. Rename to taskAttempt*
   void taskKilled(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason, @Nullable String diagnostics);
@@ -56,6 +58,16 @@ public interface TaskCommunicatorContext {
   // TODO TEZ-2003 Move to vertex, taskIndex, version. Rename to taskAttempt*
   void taskFailed(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason, @Nullable String diagnostics);
 
+  /**
+   * Register to get notifications on updates to the specified vertex. Notifications will be sent
+   * via {@link org.apache.tez.runtime.api.InputInitializer#onVertexStateUpdated(org.apache.tez.dag.api.event.VertexStateUpdate)} </p>
+   *
+   * This method can only be invoked once. Duplicate invocations will result in an error.
+   *
+   * @param vertexName the vertex name for which notifications are required.
+   * @param stateSet   the set of states for which notifications are required. null implies all
+   */
+  void registerForVertexStateUpdates(String vertexName, @Nullable Set<VertexState> stateSet);
   // TODO TEZ-2003 API. Should a method exist for task succeeded.
 
   // TODO Eventually Add methods to report availability stats to the scheduler.

http://git-wip-us.apache.org/repos/asf/tez/blob/e7c3cc72/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index cd39cc8..aaf9cca 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -17,6 +17,7 @@
 
 package org.apache.tez.dag.app;
 
+import javax.annotation.Nullable;
 import java.io.IOException;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
@@ -26,6 +27,7 @@ import java.net.UnknownHostException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 
@@ -35,13 +37,13 @@ import org.apache.tez.dag.app.dag.event.TaskAttemptEventStatusUpdate;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
 import org.apache.tez.runtime.api.events.TaskStatusUpdateEvent;
 import org.apache.tez.runtime.api.impl.EventType;
+import com.google.common.base.Preconditions;
+import org.apache.tez.dag.api.event.VertexState;
+import org.apache.tez.dag.api.event.VertexStateUpdate;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.service.AbstractService;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.tez.common.ReflectionUtils;
 import org.apache.tez.common.TezUtilsInternal;
@@ -67,14 +69,12 @@ import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezVertexID;
 import org.apache.tez.runtime.api.impl.TezEvent;
 import org.apache.tez.common.security.JobTokenSecretManager;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 
 @SuppressWarnings("unchecked")
 @InterfaceAudience.Private
 public class TaskAttemptListenerImpTezDag extends AbstractService implements
-    TaskAttemptListener, TaskCommunicatorContext {
+    TaskAttemptListener {
 
   private static final Logger LOG = LoggerFactory
       .getLogger(TaskAttemptListenerImpTezDag.class);
@@ -124,7 +124,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
     }
     this.taskCommunicators = new TaskCommunicator[taskCommunicatorClassIdentifiers.length];
     for (int i = 0 ; i < taskCommunicatorClassIdentifiers.length ; i++) {
-      taskCommunicators[i] = createTaskCommunicator(taskCommunicatorClassIdentifiers[i]);
+      taskCommunicators[i] = createTaskCommunicator(taskCommunicatorClassIdentifiers[i], i);
     }
     // TODO TEZ-2118 Start using taskCommunicator indices properly
   }
@@ -145,13 +145,13 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
     }
   }
 
-  private TaskCommunicator createTaskCommunicator(String taskCommClassIdentifier) {
+  private TaskCommunicator createTaskCommunicator(String taskCommClassIdentifier, int taskCommIndex) {
     if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
       LOG.info("Using Default Task Communicator");
-      return new TezTaskCommunicatorImpl(this);
+      return new TezTaskCommunicatorImpl(new TaskCommunicatorContextImpl(context, this, taskCommIndex));
     } else if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
       LOG.info("Using Default Local Task Communicator");
-      return new TezLocalTaskCommunicatorImpl(this);
+      return new TezLocalTaskCommunicatorImpl(new TaskCommunicatorContextImpl(context, this, taskCommIndex));
     } else {
       LOG.info("Using TaskCommunicator: " + taskCommClassIdentifier);
       Class<? extends TaskCommunicator> taskCommClazz = (Class<? extends TaskCommunicator>) ReflectionUtils
@@ -159,7 +159,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
       try {
         Constructor<? extends TaskCommunicator> ctor = taskCommClazz.getConstructor(TaskCommunicatorContext.class);
         ctor.setAccessible(true);
-        return ctor.newInstance(this);
+        return ctor.newInstance(new TaskCommunicatorContextImpl(context, this, taskCommIndex));
       } catch (NoSuchMethodException e) {
         throw new TezUncheckedException(e);
       } catch (InvocationTargetException e) {
@@ -171,18 +171,6 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
       }
     }
   }
-
-  @Override
-  public ApplicationAttemptId getApplicationAttemptId() {
-    return context.getApplicationAttemptId();
-  }
-
-  @Override
-  public Credentials getCredentials() {
-    return context.getAppCredentials();
-  }
-
-  @Override
   public TaskHeartbeatResponse heartbeat(TaskHeartbeatRequest request)
       throws IOException, TezException {
     ContainerId containerId = ConverterUtils.toContainerId(request
@@ -252,30 +240,20 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
     }
     return new TaskHeartbeatResponse(false, outEvents);
   }
-
-  @Override
-  public boolean isKnownContainer(ContainerId containerId) {
-    return context.getAllContainers().get(containerId) != null;
-  }
-
-  @Override
   public void taskAlive(TezTaskAttemptID taskAttemptId) {
     taskHeartbeatHandler.pinged(taskAttemptId);
   }
 
-  @Override
   public void containerAlive(ContainerId containerId) {
     pingContainerHeartbeatHandler(containerId);
   }
 
-  @Override
   public void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId) {
     context.getEventHandler()
         .handle(new TaskAttemptEventStartedRemotely(taskAttemptID, containerId, null));
     pingContainerHeartbeatHandler(containerId);
   }
 
-  @Override
   public void taskKilled(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason,
                          String diagnostics) {
     // Regular flow via TaskAttempt will take care of un-registering from the heartbeat handler,
@@ -288,7 +266,6 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
         taskAttemptEndReason)));
   }
 
-  @Override
   public void taskFailed(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason,
                          String diagnostics) {
     // Regular flow via TaskAttempt will take care of un-registering from the heartbeat handler,
@@ -301,6 +278,11 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
         taskAttemptEndReason)));
   }
 
+  public void vertexStateUpdateNotificationReceived(VertexStateUpdate event, int taskCommIndex) throws
+      Exception {
+    taskCommunicators[taskCommIndex].onVertexStateUpdated(event);
+  }
+
 
   /**
    * Child checking whether it can commit.
@@ -310,7 +292,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
    * {@link Task#canCommit(TezTaskAttemptID)} This is * a legacy from the
    * centralized commit protocol handling by the JobTracker.
    */
-  @Override
+//  @Override
   public boolean canCommit(TezTaskAttemptID taskAttemptId) throws IOException {
     LOG.info("Commit go/no-go request from " + taskAttemptId.toString());
     // An attempt is asking if it can commit its output. This can be decided

http://git-wip-us.apache.org/repos/asf/tez/blob/e7c3cc72/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
new file mode 100644
index 0000000..3714c3c
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.util.Set;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
+import org.apache.tez.dag.api.TaskCommunicatorContext;
+import org.apache.tez.dag.api.TaskHeartbeatRequest;
+import org.apache.tez.dag.api.TaskHeartbeatResponse;
+import org.apache.tez.dag.api.TezException;
+import org.apache.tez.dag.api.TezUncheckedException;
+import org.apache.tez.dag.api.event.VertexState;
+import org.apache.tez.dag.api.event.VertexStateUpdate;
+import org.apache.tez.dag.app.dag.VertexStateUpdateListener;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+
+@InterfaceAudience.Private
+public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, VertexStateUpdateListener {
+
+
+  private final AppContext context;
+  private final TaskAttemptListenerImpTezDag taskAttemptListener;
+  private final int taskCommunicatorIndex;
+
+  public TaskCommunicatorContextImpl(AppContext appContext,
+                                     TaskAttemptListenerImpTezDag taskAttemptListener,
+                                     int taskCommunicatorIndex) {
+    this.context = appContext;
+    this.taskAttemptListener = taskAttemptListener;
+    this.taskCommunicatorIndex = taskCommunicatorIndex;
+  }
+
+  @Override
+  public ApplicationAttemptId getApplicationAttemptId() {
+    return context.getApplicationAttemptId();
+  }
+
+  @Override
+  public Credentials getCredentials() {
+    return context.getAppCredentials();
+  }
+
+  @Override
+  public boolean canCommit(TezTaskAttemptID taskAttemptId) throws IOException {
+    return taskAttemptListener.canCommit(taskAttemptId);
+  }
+
+  @Override
+  public TaskHeartbeatResponse heartbeat(TaskHeartbeatRequest request) throws IOException,
+      TezException {
+    return taskAttemptListener.heartbeat(request);
+  }
+
+  @Override
+  public boolean isKnownContainer(ContainerId containerId) {
+    return context.getAllContainers().get(containerId) != null;
+  }
+
+  @Override
+  public void taskAlive(TezTaskAttemptID taskAttemptId) {
+    taskAttemptListener.taskAlive(taskAttemptId);
+  }
+
+  @Override
+  public void containerAlive(ContainerId containerId) {
+    taskAttemptListener.containerAlive(containerId);
+  }
+
+  @Override
+  public void taskStartedRemotely(TezTaskAttemptID taskAttemptId, ContainerId containerId) {
+    taskAttemptListener.taskStartedRemotely(taskAttemptId, containerId);
+  }
+
+  @Override
+  public void taskKilled(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason,
+                         @Nullable String diagnostics) {
+    taskAttemptListener.taskKilled(taskAttemptId, taskAttemptEndReason, diagnostics);
+  }
+
+  @Override
+  public void taskFailed(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason,
+                         @Nullable String diagnostics) {
+    taskAttemptListener.taskFailed(taskAttemptId, taskAttemptEndReason, diagnostics);
+
+  }
+
+  @Override
+  public void registerForVertexStateUpdates(String vertexName,
+                                            @Nullable Set<VertexState> stateSet) {
+    Preconditions.checkNotNull(vertexName, "VertexName cannot be null: " + vertexName);
+    context.getCurrentDAG().getStateChangeNotifier().registerForVertexUpdates(vertexName, stateSet, this);
+  }
+
+
+  @Override
+  public void onStateUpdated(VertexStateUpdate event) {
+    try {
+      taskAttemptListener.vertexStateUpdateNotificationReceived(event, taskCommunicatorIndex);
+    } catch (Exception e) {
+      // TODO TEZ-2003 This needs to be propagated to the DAG as a user error.
+      throw new TezUncheckedException(e);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/e7c3cc72/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
index ef4f764..1417a3b 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -48,6 +48,7 @@ import org.apache.tez.dag.api.TaskHeartbeatResponse;
 import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.TezException;
 import org.apache.tez.dag.api.TezUncheckedException;
+import org.apache.tez.dag.api.event.VertexStateUpdate;
 import org.apache.tez.dag.app.security.authorize.TezAMPolicyProvider;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.runtime.api.impl.TaskSpec;
@@ -252,6 +253,11 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
     return address;
   }
 
+  @Override
+  public void onVertexStateUpdated(VertexStateUpdate stateUpdate) throws Exception {
+    // Empty. Not registering, or expecting any updates.
+  }
+
   protected String getTokenIdentifier() {
     return tokenIdentifier;
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/e7c3cc72/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
index 4c3426a..6d6872b 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
@@ -94,4 +94,6 @@ public interface DAG {
 
   Map<String, TezVertexID> getVertexNameIDMapping();
 
+  StateChangeNotifier getStateChangeNotifier();
+
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/e7c3cc72/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
index 0a87241..4d10711 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
@@ -696,6 +696,11 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
   }
 
   @Override
+  public StateChangeNotifier getStateChangeNotifier() {
+    return entityUpdateTracker;
+  }
+
+  @Override
   public TezCounters getAllCounters() {
 
     readLock.lock();


[04/43] tez git commit: TEZ-2396. pig-tez-tfile-parser pom is hard coded to depend on 0.6.0-SNAPSHOT version. (Rajesh Balamohan via hitesh)

Posted by ss...@apache.org.
TEZ-2396. pig-tez-tfile-parser pom is hard coded to depend on 0.6.0-SNAPSHOT version. (Rajesh Balamohan via hitesh)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/16bbc58c
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/16bbc58c
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/16bbc58c

Branch: refs/heads/TEZ-2003
Commit: 16bbc58ca70c7878696521530dc2c6f454b27019
Parents: da3fecd
Author: Hitesh Shah <hi...@apache.org>
Authored: Wed May 6 11:50:53 2015 -0700
Committer: Hitesh Shah <hi...@apache.org>
Committed: Wed May 6 11:50:53 2015 -0700

----------------------------------------------------------------------
 BUILDING.txt                          |   9 ++
 CHANGES.txt                           |   1 +
 pom.xml                               |   7 ++
 tez-tools/pom.xml                     |  50 +++++++++
 tez-tools/tez-tfile-parser/README.txt |   2 +-
 tez-tools/tez-tfile-parser/pom.xml    | 159 +++++++----------------------
 6 files changed, 104 insertions(+), 124 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/16bbc58c/BUILDING.txt
----------------------------------------------------------------------
diff --git a/BUILDING.txt b/BUILDING.txt
index 29b92ac..5247b9d 100644
--- a/BUILDING.txt
+++ b/BUILDING.txt
@@ -157,3 +157,12 @@ For example, to generate the state machine graphviz file for DAGImpl, run:
 To generate the diagram, you can use a Graphviz application or something like:
 
   $ dot -Tpng -o Tez.png Tez.gv'
+
+----------------------------------------------------------------------------------
+Building contrib tools under tez-tools :
+
+Use -Ptools to build various contrib tools present under tez-tools. For example, run:
+
+ $ mvn package -Ptools
+
+----------------------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/tez/blob/16bbc58c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index f95e626..d3aa8a9 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -176,6 +176,7 @@ Release 0.6.1: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-2396. pig-tez-tfile-parser pom is hard coded to depend on 0.6.0-SNAPSHOT version.
   TEZ-2237. Valid events should be sent out when an Output is not started.
   TEZ-1988. Tez UI: does not work when using file:// in a browser
   TEZ-2390. tez-tools swimlane tool fails to parse large jobs >8K containers

http://git-wip-us.apache.org/repos/asf/tez/blob/16bbc58c/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 419138b..ce4fa13 100644
--- a/pom.xml
+++ b/pom.xml
@@ -39,6 +39,7 @@
     <clover.license>${user.home}/clover.license</clover.license>
     <hadoop.version>2.6.0</hadoop.version>
     <jetty.version>6.1.26</jetty.version>
+    <pig.version>0.13.0</pig.version>
     <distMgmtSnapshotsId>apache.snapshots.https</distMgmtSnapshotsId>
     <distMgmtSnapshotsName>Apache Development Snapshot Repository</distMgmtSnapshotsName>
     <distMgmtSnapshotsUrl>https://repository.apache.org/content/repositories/snapshots</distMgmtSnapshotsUrl>
@@ -169,6 +170,11 @@
         <type>test-jar</type>
       </dependency>
       <dependency>
+        <groupId>org.apache.pig</groupId>
+        <artifactId>pig</artifactId>
+        <version>${pig.version}</version>
+      </dependency>
+      <dependency>
         <groupId>org.slf4j</groupId>
         <artifactId>slf4j-api</artifactId>
         <version>1.7.5</version>
@@ -631,6 +637,7 @@
     <module>tez-dag</module>
     <module>tez-ui</module>
     <module>tez-plugins</module>
+    <module>tez-tools</module>
     <module>tez-dist</module>
     <module>docs</module>
   </modules>

http://git-wip-us.apache.org/repos/asf/tez/blob/16bbc58c/tez-tools/pom.xml
----------------------------------------------------------------------
diff --git a/tez-tools/pom.xml b/tez-tools/pom.xml
new file mode 100644
index 0000000..bf8fdf8
--- /dev/null
+++ b/tez-tools/pom.xml
@@ -0,0 +1,50 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.tez</groupId>
+    <artifactId>tez</artifactId>
+    <version>0.8.0-SNAPSHOT</version>
+  </parent>
+  <artifactId>tez-tools</artifactId>
+  <packaging>pom</packaging>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+  <profiles>
+    <profile>
+      <id>tools</id>
+      <activation>
+        <activeByDefault>false</activeByDefault>
+      </activation>
+      <modules>
+        <module>tez-tfile-parser</module>
+      </modules>
+    </profile>
+  </profiles>
+
+</project>

http://git-wip-us.apache.org/repos/asf/tez/blob/16bbc58c/tez-tools/tez-tfile-parser/README.txt
----------------------------------------------------------------------
diff --git a/tez-tools/tez-tfile-parser/README.txt b/tez-tools/tez-tfile-parser/README.txt
index 09514ed..5b18767 100644
--- a/tez-tools/tez-tfile-parser/README.txt
+++ b/tez-tools/tez-tfile-parser/README.txt
@@ -3,7 +3,7 @@ This is a simple Pig loader to parse TFiles and provide line by line format (tup
 
 Build/Install:
 ==============
-1. "mvn clean package" should create "tfile-parser-1.0-SNAPSHOT.jar" would be created in ./target directory
+1. "mvn clean package" should create "tfile-parser-x.y.z-SNAPSHOT.jar" would be created in ./target directory
 
 Running pig with tez:
 ====================

http://git-wip-us.apache.org/repos/asf/tez/blob/16bbc58c/tez-tools/tez-tfile-parser/pom.xml
----------------------------------------------------------------------
diff --git a/tez-tools/tez-tfile-parser/pom.xml b/tez-tools/tez-tfile-parser/pom.xml
index 64191ac..aea1299 100644
--- a/tez-tools/tez-tfile-parser/pom.xml
+++ b/tez-tools/tez-tfile-parser/pom.xml
@@ -15,131 +15,44 @@
 <project xmlns="http://maven.apache.org/POM/4.0.0"
          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <modelVersion>4.0.0</modelVersion>
+  <modelVersion>4.0.0</modelVersion>
 
-    <groupId>pig-tez-tfile-parser</groupId>
-    <artifactId>tfile-parser</artifactId>
-    <version>1.0-SNAPSHOT</version>
+  <parent>
+    <groupId>org.apache.tez</groupId>
+    <artifactId>tez-tools</artifactId>
+    <version>0.8.0-SNAPSHOT</version>
+  </parent>
+  <artifactId>tez-tfile-parser</artifactId>
 
-    <properties>
-        <tez-version>0.8.0-SNAPSHOT</tez-version>
-        <hadoop-version>2.6.0</hadoop-version>
-    </properties>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-deploy-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
 
-    <dependencies>
-        <dependency>
-            <groupId>org.apache.pig</groupId>
-            <artifactId>pig</artifactId>
-            <version>0.13.0</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.tez</groupId>
-            <artifactId>tez-api</artifactId>
-            <version>${tez-version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.tez</groupId>
-            <artifactId>tez-common</artifactId>
-            <version>${tez-version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.tez</groupId>
-            <artifactId>tez-runtime-library</artifactId>
-            <version>${tez-version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.tez</groupId>
-            <artifactId>tez-mapreduce</artifactId>
-            <version>${tez-version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-common</artifactId>
-            <version>${hadoop-version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-yarn-api</artifactId>
-            <version>${hadoop-version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-hdfs</artifactId>
-            <version>${hadoop-version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-yarn-common</artifactId>
-            <version>${hadoop-version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-yarn-client</artifactId>
-            <version>${hadoop-version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-yarn-server-nodemanager</artifactId>
-            <version>${hadoop-version}</version>
-        </dependency>
-        <dependency>
-            <groupId>commons-cli</groupId>
-            <artifactId>commons-cli</artifactId>
-            <version>1.1</version>
-            <scope>compile</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.mockito</groupId>
-            <artifactId>mockito-core</artifactId>
-            <version>1.8.5</version>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-            <version>4.7</version>
-            <scope>test</scope>
-        </dependency>
-    </dependencies>
-    <repositories>
-        <repository>
-            <id>apache.snapshots</id>
-            <name>Apache Snapshot Repository</name>
-            <url>http://repository.apache.org/snapshots</url>
-            <releases>
-                <enabled>false</enabled>
-            </releases>
-            <snapshots>
-                <enabled>true</enabled>
-            </snapshots>
-        </repository>
-    </repositories>
-    <build>
-        <plugins>
-            <plugin>
-                <artifactId>maven-compiler-plugin</artifactId>
-                <configuration>
-                    <source>1.6</source>
-                    <target>1.6</target>
-                </configuration>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-dependency-plugin</artifactId>
-                <executions>
-                    <execution>
-                        <id>copy-dependencies</id>
-                        <phase>package</phase>
-                        <goals>
-                            <goal>copy-dependencies</goal>
-                        </goals>
-                        <configuration>
-                            <outputDirectory>${project.build.directory}/lib</outputDirectory>
-                        </configuration>
-                    </execution>
-                </executions>
-            </plugin>
-        </plugins>
-    </build>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.pig</groupId>
+      <artifactId>pig</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-common</artifactId>
+    </dependency>
+  </dependencies>
 
 </project>


[26/43] tez git commit: TEZ-2123. Fix component managers to use pluggable components. Enable hybrid mode. (sseth)

Posted by ss...@apache.org.
TEZ-2123. Fix component managers to use pluggable components. Enable
hybrid mode. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/c47951ab
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/c47951ab
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/c47951ab

Branch: refs/heads/TEZ-2003
Commit: c47951abda6c02b2373f78a6d56553e03523017c
Parents: cee4809
Author: Siddharth Seth <ss...@apache.org>
Authored: Fri Feb 20 11:59:03 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:29 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                            |   1 +
 .../org/apache/tez/dag/app/DAGAppMaster.java    |   4 +-
 .../apache/tez/dag/app/TaskAttemptListener.java |  12 +-
 .../dag/app/TaskAttemptListenerImpTezDag.java   |  27 ++--
 .../tez/dag/app/dag/impl/TaskAttemptImpl.java   |   4 +-
 .../TezRootInputInitializerContextImpl.java     |   2 +-
 .../apache/tez/dag/app/dag/impl/VertexImpl.java |   2 +-
 .../tez/dag/app/dag/impl/VertexManager.java     |   2 +-
 .../app/launcher/ContainerLauncherRouter.java   |   2 +-
 .../app/launcher/LocalContainerLauncher.java    |  10 +-
 .../rm/AMSchedulerEventDeallocateContainer.java |   7 +-
 .../rm/AMSchedulerEventNodeBlacklistUpdate.java |   8 +-
 .../tez/dag/app/rm/AMSchedulerEventTAEnded.java |  10 +-
 .../dag/app/rm/LocalTaskSchedulerService.java   |  19 ++-
 .../tez/dag/app/rm/NMCommunicatorEvent.java     |  12 +-
 .../rm/NMCommunicatorLaunchRequestEvent.java    |  11 +-
 .../app/rm/NMCommunicatorStopRequestEvent.java  |   4 +-
 .../dag/app/rm/TaskSchedulerEventHandler.java   | 151 ++++++++++++-----
 .../tez/dag/app/rm/container/AMContainer.java   |   3 +
 .../AMContainerEventLaunchRequest.java          |  15 +-
 .../dag/app/rm/container/AMContainerImpl.java   |  39 +++--
 .../dag/app/rm/container/AMContainerMap.java    |   4 +-
 .../apache/tez/dag/app/rm/node/AMNodeImpl.java  |   6 +-
 .../apache/tez/dag/app/MockDAGAppMaster.java    |   2 +-
 .../app/TestTaskAttemptListenerImplTezDag.java  |  31 ++--
 .../tez/dag/app/dag/impl/TestTaskAttempt.java   |  69 ++++----
 .../tez/dag/app/dag/impl/TestVertexImpl.java    |   8 +-
 .../tez/dag/app/rm/TestContainerReuse.java      |  34 ++--
 .../tez/dag/app/rm/TestLocalTaskScheduler.java  |   2 +-
 .../app/rm/TestLocalTaskSchedulerService.java   |  18 ++-
 .../app/rm/TestTaskSchedulerEventHandler.java   |  11 +-
 .../dag/app/rm/TestTaskSchedulerHelpers.java    |   2 +-
 .../dag/app/rm/container/TestAMContainer.java   | 108 +++++++------
 .../app/rm/container/TestAMContainerMap.java    |   6 +-
 .../org/apache/tez/examples/JoinValidate.java   |  30 +++-
 .../TezTestServiceContainerLauncher.java        |   5 +-
 .../rm/TezTestServiceTaskSchedulerService.java  | 100 ++----------
 .../tez/examples/JoinValidateConfigured.java    |  53 ++++++
 .../tez/tests/TestExternalTezServices.java      | 160 ++++++++++++++-----
 39 files changed, 638 insertions(+), 356 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 4bfe08f..1a2264c 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -4,5 +4,6 @@ ALL CHANGES:
   TEZ-2090. Add tests for jobs running in external services.
   TEZ-2117. Add a manager for ContainerLaunchers running in the AM.
   TEZ-2122. Setup pluggable components at AM/Vertex level.
+  TEZ-2123. Fix component managers to use pluggable components. (Enable hybrid mode)
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index 6814cda..89b6506 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -513,7 +513,7 @@ public class DAGAppMaster extends AbstractService {
 
     this.taskSchedulerEventHandler = new TaskSchedulerEventHandler(context,
         clientRpcServer, dispatcher.getEventHandler(), containerSignatureMatcher, webUIService,
-        taskSchedulerClassIdentifiers);
+        taskSchedulerClassIdentifiers, isLocal);
     addIfService(taskSchedulerEventHandler, true);
 
     if (enableWebUIService()) {
@@ -2283,6 +2283,7 @@ public class DAGAppMaster extends AbstractService {
   // Tez default classnames are populated as TezConfiguration.TEZ_AM_SERVICE_PLUGINS_DEFAULT
   private String[] parsePlugins(BiMap<String, Integer> pluginMap, String[] pluginStrings,
                                    String context) {
+    // TODO TEZ-2003 Duplicate error checking - ideally in the client itself. Depends on the final API.
     Preconditions.checkState(pluginStrings != null && pluginStrings.length > 0,
         "Plugin strings should not be null or empty: " + context);
 
@@ -2320,6 +2321,7 @@ public class DAGAppMaster extends AbstractService {
       }
       pluginMap.put(identifierString, index);
       classNames[index] = className;
+      index++;
     }
     return classNames;
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListener.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListener.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListener.java
index 9caa7cf..e4dad27 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListener.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListener.java
@@ -22,6 +22,7 @@ import java.net.InetSocketAddress;
 
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.tez.dag.app.dag.DAG;
+import org.apache.tez.dag.api.TaskCommunicator;
 import org.apache.tez.dag.app.rm.container.AMContainerTask;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 /**
@@ -29,18 +30,17 @@ import org.apache.tez.dag.records.TezTaskAttemptID;
  */
 public interface TaskAttemptListener {
 
-  InetSocketAddress getAddress();
+  void registerRunningContainer(ContainerId containerId, int taskCommId);
 
-  void registerRunningContainer(ContainerId containerId);
-
-  void registerTaskAttempt(AMContainerTask amContainerTask, ContainerId containerId);
+  void registerTaskAttempt(AMContainerTask amContainerTask, ContainerId containerId, int taskCommId);
   
-  void unregisterRunningContainer(ContainerId containerId);
+  void unregisterRunningContainer(ContainerId containerId, int taskCommId);
   
-  void unregisterTaskAttempt(TezTaskAttemptID attemptID);
+  void unregisterTaskAttempt(TezTaskAttemptID attemptID, int taskCommId);
 
   void dagComplete(DAG dag);
 
   void dagSubmitted();
 
+  TaskCommunicator getTaskCommunicator(int taskCommIndex);
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index 2f6dcf5..d21b7d0 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -273,11 +273,6 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
     return task.canCommit(taskAttemptId);
   }
 
-  @Override
-  public InetSocketAddress getAddress() {
-    return taskCommunicators[0].getAddress();
-  }
-
   // The TaskAttemptListener register / unregister methods in this class are not thread safe.
   // The Tez framework should not invoke these methods from multiple threads.
   @Override
@@ -297,7 +292,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
   }
 
   @Override
-  public void registerRunningContainer(ContainerId containerId) {
+  public void registerRunningContainer(ContainerId containerId, int taskCommId) {
     if (LOG.isDebugEnabled()) {
       LOG.debug("ContainerId: " + containerId + " registered with TaskAttemptListener");
     }
@@ -307,11 +302,12 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
           "Multiple registrations for containerId: " + containerId);
     }
     NodeId nodeId = context.getAllContainers().get(containerId).getContainer().getNodeId();
-    taskCommunicators[0].registerRunningContainer(containerId, nodeId.getHost(), nodeId.getPort());
+    taskCommunicators[taskCommId].registerRunningContainer(containerId, nodeId.getHost(),
+        nodeId.getPort());
   }
 
   @Override
-  public void unregisterRunningContainer(ContainerId containerId) {
+  public void unregisterRunningContainer(ContainerId containerId, int taskCommId) {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Unregistering Container from TaskAttemptListener: " + containerId);
     }
@@ -319,12 +315,12 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
     if (containerInfo.taskAttemptId != null) {
       registeredAttempts.remove(containerInfo.taskAttemptId);
     }
-    taskCommunicators[0].registerContainerEnd(containerId);
+    taskCommunicators[taskCommId].registerContainerEnd(containerId);
   }
 
   @Override
   public void registerTaskAttempt(AMContainerTask amContainerTask,
-                                  ContainerId containerId) {
+                                  ContainerId containerId, int taskCommId) {
     ContainerInfo containerInfo = registeredContainers.get(containerId);
     if (containerInfo == null) {
       throw new TezUncheckedException("Registering task attempt: "
@@ -354,13 +350,13 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
           + amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId
           + " when already assigned to: " + containerIdFromMap);
     }
-    taskCommunicators[0].registerRunningTaskAttempt(containerId, amContainerTask.getTask(),
+    taskCommunicators[taskCommId].registerRunningTaskAttempt(containerId, amContainerTask.getTask(),
         amContainerTask.getAdditionalResources(), amContainerTask.getCredentials(),
         amContainerTask.haveCredentialsChanged());
   }
 
   @Override
-  public void unregisterTaskAttempt(TezTaskAttemptID attemptId) {
+  public void unregisterTaskAttempt(TezTaskAttemptID attemptId, int taskCommId) {
     ContainerId containerId = registeredAttempts.remove(attemptId);
     if (containerId == null) {
       LOG.warn("Unregister task attempt: " + attemptId + " from unknown container");
@@ -374,7 +370,12 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
     }
     // Explicitly putting in a new entry so that synchronization is not required on the existing element in the map.
     registeredContainers.put(containerId, NULL_CONTAINER_INFO);
-    taskCommunicators[0].unregisterRunningTaskAttempt(attemptId);
+    taskCommunicators[taskCommId].unregisterRunningTaskAttempt(attemptId);
+  }
+
+  @Override
+  public TaskCommunicator getTaskCommunicator(int taskCommIndex) {
+    return taskCommunicators[taskCommIndex];
   }
 
   private void pingContainerHeartbeatHandler(ContainerId containerId) {

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
index c18dc00..c80571d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
@@ -1218,7 +1218,7 @@ public class TaskAttemptImpl implements TaskAttempt,
       // Inform the scheduler
       if (sendSchedulerEvent()) {
         ta.sendEvent(new AMSchedulerEventTAEnded(ta, ta.containerId, helper
-            .getTaskAttemptState()));
+            .getTaskAttemptState(), ta.getVertex().getTaskSchedulerIdentifier()));
       }
     }
   }
@@ -1300,7 +1300,7 @@ public class TaskAttemptImpl implements TaskAttempt,
 
       // Inform the Scheduler.
       ta.sendEvent(new AMSchedulerEventTAEnded(ta, ta.containerId,
-          TaskAttemptState.SUCCEEDED));
+          TaskAttemptState.SUCCEEDED, ta.getVertex().getTaskSchedulerIdentifier()));
 
       // Inform the task.
       ta.sendEvent(new TaskEventTAUpdate(ta.attemptId,

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezRootInputInitializerContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezRootInputInitializerContextImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezRootInputInitializerContextImpl.java
index d4ef4d5..4ca4024 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezRootInputInitializerContextImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezRootInputInitializerContextImpl.java
@@ -96,7 +96,7 @@ public class TezRootInputInitializerContextImpl implements
 
   @Override
   public Resource getTotalAvailableResource() {
-    return appContext.getTaskScheduler().getTotalResources();
+    return appContext.getTaskScheduler().getTotalResources(vertex.getTaskSchedulerIdentifier());
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
index 097cf3d..18d3368 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
@@ -4396,7 +4396,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
         eventHandler, getTotalTasks(),
         appContext.getTaskScheduler().getNumClusterNodes(),
         getTaskResource(),
-        appContext.getTaskScheduler().getTotalResources());
+        appContext.getTaskScheduler().getTotalResources(taskSchedulerIdentifier));
     List<RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor>>
         inputList = Lists.newArrayListWithCapacity(inputsWithInitializers.size());
     for (String inputName : inputsWithInitializers) {

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexManager.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexManager.java
index 945d9ba..1300fc0 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexManager.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexManager.java
@@ -286,7 +286,7 @@ public class VertexManager {
     @Override
     public synchronized Resource getTotalAvailableResource() {
       checkAndThrowIfDone();
-      return appContext.getTaskScheduler().getTotalResources();
+      return appContext.getTaskScheduler().getTotalResources(managedVertex.getTaskSchedulerIdentifier());
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
index 621e4a8..4f9b5bf 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
@@ -124,6 +124,6 @@ public class ContainerLauncherRouter extends AbstractService
 
   @Override
   public void handle(NMCommunicatorEvent event) {
-    containerLaunchers[0].handle(event);
+    containerLaunchers[event.getLauncherId()].handle(event);
   }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
index e9ba9d7..9a38732 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
@@ -59,7 +59,6 @@ import org.apache.tez.dag.api.TezException;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.dag.app.TaskAttemptListener;
-import org.apache.tez.dag.app.TaskAttemptListenerImpTezDag;
 import org.apache.tez.dag.app.TezTaskCommunicatorImpl;
 import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
 import org.apache.tez.dag.app.rm.NMCommunicatorLaunchRequestEvent;
@@ -88,9 +87,9 @@ public class LocalContainerLauncher extends AbstractService implements
 
   private static final Logger LOG = LoggerFactory.getLogger(LocalContainerLauncher.class);
   private final AppContext context;
-  private final TezTaskUmbilicalProtocol taskUmbilicalProtocol;
   private final AtomicBoolean serviceStopped = new AtomicBoolean(false);
   private final String workingDirectory;
+  private final TaskAttemptListener tal;
   private final Map<String, String> localEnv = new HashMap<String, String>();
   private final ExecutionContext executionContext;
   private int numExecutors;
@@ -116,9 +115,8 @@ public class LocalContainerLauncher extends AbstractService implements
                                 String workingDirectory) throws UnknownHostException {
     super(LocalContainerLauncher.class.getName());
     this.context = context;
-    TaskAttemptListenerImpTezDag taListener = (TaskAttemptListenerImpTezDag)taskAttemptListener;
-    TezTaskCommunicatorImpl taskComm = (TezTaskCommunicatorImpl) taListener.getTaskCommunicator();
-    this.taskUmbilicalProtocol = taskComm.getUmbilical();
+    this.tal = taskAttemptListener;
+
     this.workingDirectory = workingDirectory;
     AuxiliaryServiceHelper.setServiceDataIntoEnv(
         ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID, ByteBuffer.allocate(4).putInt(0), localEnv);
@@ -219,7 +217,7 @@ public class LocalContainerLauncher extends AbstractService implements
         tezChild =
             createTezChild(context.getAMConf(), event.getContainerId(), tokenIdentifier,
                 context.getApplicationAttemptId().getAttemptId(), context.getLocalDirs(),
-                taskUmbilicalProtocol,
+                ((TezTaskCommunicatorImpl)tal.getTaskCommunicator(event.getTaskCommId())).getUmbilical(),
                 TezCommonUtils.parseCredentialsBytes(event.getContainerLaunchContext().getTokens().array()));
       } catch (InterruptedException e) {
         handleLaunchFailed(e, event.getContainerId());

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventDeallocateContainer.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventDeallocateContainer.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventDeallocateContainer.java
index 1b51920..5270aa2 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventDeallocateContainer.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventDeallocateContainer.java
@@ -23,15 +23,20 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
 public class AMSchedulerEventDeallocateContainer extends AMSchedulerEvent {
 
   private final ContainerId containerId;
+  private final int schedulerId;
   
-  public AMSchedulerEventDeallocateContainer(ContainerId containerId) {
+  public AMSchedulerEventDeallocateContainer(ContainerId containerId, int schedulerId) {
     super(AMSchedulerEventType.S_CONTAINER_DEALLOCATE);
     this.containerId = containerId;
+    this.schedulerId = schedulerId;
   }
   
   public ContainerId getContainerId() {
     return this.containerId;
   }
 
+  public int getSchedulerId() {
+    return schedulerId;
+  }
 }
 

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventNodeBlacklistUpdate.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventNodeBlacklistUpdate.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventNodeBlacklistUpdate.java
index ed7ebc3..679705a 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventNodeBlacklistUpdate.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventNodeBlacklistUpdate.java
@@ -23,14 +23,20 @@ import org.apache.hadoop.yarn.api.records.NodeId;
 public class AMSchedulerEventNodeBlacklistUpdate extends AMSchedulerEvent {
 
   private final NodeId nodeId;
+  private final int schedulerId;
 
-  public AMSchedulerEventNodeBlacklistUpdate(NodeId nodeId, boolean add) {
+  public AMSchedulerEventNodeBlacklistUpdate(NodeId nodeId, boolean add, int schedulerId) {
     super((add ? AMSchedulerEventType.S_NODE_BLACKLISTED
         : AMSchedulerEventType.S_NODE_UNBLACKLISTED));
     this.nodeId = nodeId;
+    this.schedulerId = schedulerId;
   }
 
   public NodeId getNodeId() {
     return this.nodeId;
   }
+
+  public int getSchedulerId() {
+    return schedulerId;
+  }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java
index 90e76b7..2ace642 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java
@@ -26,14 +26,16 @@ public class AMSchedulerEventTAEnded extends AMSchedulerEvent {
 
   private final TaskAttempt attempt;
   private final ContainerId containerId;
-  private TaskAttemptState state;
+  private final TaskAttemptState state;
+  private final int schedulerId;
 
   public AMSchedulerEventTAEnded(TaskAttempt attempt, ContainerId containerId,
-      TaskAttemptState state) {
+      TaskAttemptState state, int schedulerId) {
     super(AMSchedulerEventType.S_TA_ENDED);
     this.attempt = attempt;
     this.containerId = containerId;
     this.state = state;
+    this.schedulerId = schedulerId;
   }
 
   public TezTaskAttemptID getAttemptID() {
@@ -51,4 +53,8 @@ public class AMSchedulerEventTAEnded extends AMSchedulerEvent {
   public ContainerId getUsedContainerId() {
     return this.containerId;
   }
+
+  public int getSchedulerId() {
+    return schedulerId;
+  }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
index 51d8b9d..72a074f 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
@@ -34,6 +34,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.NodeId;
@@ -63,10 +64,11 @@ public class LocalTaskSchedulerService extends TaskSchedulerService {
   final int appHostPort;
   final String appTrackingUrl;
   final AppContext appContext;
+  final long customContainerAppId;
 
   public LocalTaskSchedulerService(TaskSchedulerAppCallback appClient,
       ContainerSignatureMatcher containerSignatureMatcher, String appHostName,
-      int appHostPort, String appTrackingUrl, AppContext appContext) {
+      int appHostPort, String appTrackingUrl, long customContainerAppId, AppContext appContext) {
     super(LocalTaskSchedulerService.class.getName());
     this.realAppClient = appClient;
     this.appCallbackExecutor = createAppCallbackExecutorService();
@@ -78,6 +80,7 @@ public class LocalTaskSchedulerService extends TaskSchedulerService {
     this.appContext = appContext;
     taskRequestQueue = new PriorityBlockingQueue<TaskRequest>();
     taskAllocations = new LinkedHashMap<Object, Container>();
+    this.customContainerAppId = customContainerAppId;
   }
 
   private ExecutorService createAppCallbackExecutorService() {
@@ -164,7 +167,7 @@ public class LocalTaskSchedulerService extends TaskSchedulerService {
 
   protected AsyncDelegateRequestHandler createRequestHandler(Configuration conf) {
     return new AsyncDelegateRequestHandler(taskRequestQueue,
-        new LocalContainerFactory(appContext),
+        new LocalContainerFactory(appContext, customContainerAppId),
         taskAllocations,
         appClientDelegate,
         conf);
@@ -195,17 +198,19 @@ public class LocalTaskSchedulerService extends TaskSchedulerService {
   }
 
   static class LocalContainerFactory {
-    final AppContext appContext;
     AtomicInteger nextId;
+    final ApplicationAttemptId customAppAttemptId;
 
-    public LocalContainerFactory(AppContext appContext) {
-      this.appContext = appContext;
+    public LocalContainerFactory(AppContext appContext, long appIdLong) {
       this.nextId = new AtomicInteger(1);
+      ApplicationId appId = ApplicationId
+          .newInstance(appIdLong, appContext.getApplicationAttemptId().getApplicationId().getId());
+      this.customAppAttemptId = ApplicationAttemptId
+          .newInstance(appId, appContext.getApplicationAttemptId().getAttemptId());
     }
 
     public Container createContainer(Resource capability, Priority priority) {
-      ApplicationAttemptId appAttemptId = appContext.getApplicationAttemptId();
-      ContainerId containerId = ContainerId.newInstance(appAttemptId, nextId.getAndIncrement());
+      ContainerId containerId = ContainerId.newInstance(customAppAttemptId, nextId.getAndIncrement());
       NodeId nodeId = NodeId.newInstance("127.0.0.1", 0);
       String nodeHttpAddress = "127.0.0.1:0";
 

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorEvent.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorEvent.java
index 8bdeb28..f86894f 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorEvent.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorEvent.java
@@ -28,13 +28,15 @@ public class NMCommunicatorEvent extends AbstractEvent<NMCommunicatorEventType>
   private final ContainerId containerId;
   private final NodeId nodeId;
   private final Token containerToken;
+  private final int launcherId;
 
   public NMCommunicatorEvent(ContainerId containerId, NodeId nodeId,
-      Token containerToken, NMCommunicatorEventType type) {
+      Token containerToken, NMCommunicatorEventType type, int launcherId) {
     super(type);
     this.containerId = containerId;
     this.nodeId = nodeId;
     this.containerToken = containerToken;
+    this.launcherId = launcherId;
   }
 
   public ContainerId getContainerId() {
@@ -48,10 +50,14 @@ public class NMCommunicatorEvent extends AbstractEvent<NMCommunicatorEventType>
   public Token getContainerToken() {
     return this.containerToken;
   }
-  
+
+  public int getLauncherId() {
+    return launcherId;
+  }
+
   public String toSrting() {
     return super.toString() + " for container " + containerId + ", nodeId: "
-        + nodeId;
+        + nodeId + ", launcherId: " + launcherId;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorLaunchRequestEvent.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorLaunchRequestEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorLaunchRequestEvent.java
index c3b12c0..a38345c 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorLaunchRequestEvent.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorLaunchRequestEvent.java
@@ -25,13 +25,16 @@ public class NMCommunicatorLaunchRequestEvent extends NMCommunicatorEvent {
 
   private final ContainerLaunchContext clc;
   private final Container container;
+  // The task communicator index for the specific container being launched.
+  private final int taskCommId;
 
   public NMCommunicatorLaunchRequestEvent(ContainerLaunchContext clc,
-      Container container) {
+      Container container, int launcherId, int taskCommId) {
     super(container.getId(), container.getNodeId(), container
-        .getContainerToken(), NMCommunicatorEventType.CONTAINER_LAUNCH_REQUEST);
+        .getContainerToken(), NMCommunicatorEventType.CONTAINER_LAUNCH_REQUEST, launcherId);
     this.clc = clc;
     this.container = container;
+    this.taskCommId = taskCommId;
   }
 
   public ContainerLaunchContext getContainerLaunchContext() {
@@ -42,6 +45,10 @@ public class NMCommunicatorLaunchRequestEvent extends NMCommunicatorEvent {
     return container;
   }
 
+  public int getTaskCommId() {
+    return taskCommId;
+  }
+
   @Override
   public boolean equals(Object o) {
     if (this == o) {

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorStopRequestEvent.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorStopRequestEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorStopRequestEvent.java
index 277d1e7..c9b5c44 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorStopRequestEvent.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorStopRequestEvent.java
@@ -25,9 +25,9 @@ import org.apache.hadoop.yarn.api.records.Token;
 public class NMCommunicatorStopRequestEvent extends NMCommunicatorEvent {
 
   public NMCommunicatorStopRequestEvent(ContainerId containerId, NodeId nodeId,
-      Token containerToken) {
+      Token containerToken, int launcherId) {
     super(containerId, nodeId, containerToken,
-        NMCommunicatorEventType.CONTAINER_STOP_REQUEST);
+        NMCommunicatorEventType.CONTAINER_STOP_REQUEST, launcherId);
   }
 
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
index 72389e7..5a0ace8 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
@@ -108,9 +108,22 @@ public class TaskSchedulerEventHandler extends AbstractService
   private final String[] taskSchedulerClasses;
   protected final TaskSchedulerService []taskSchedulers;
 
+  private final boolean isPureLocalMode;
+  // If running in non local-only mode, the YARN task scheduler will always run to take care of
+  // registration with YARN and heartbeats to YARN.
+  // Splitting registration and heartbeats is not straigh-forward due to the taskScheduler being
+  // tied to a ContainerRequestType.
+  private final int yarnTaskSchedulerIndex;
+  // Custom AppIds to avoid container conflicts if there's multiple sources
+  private final long SCHEDULER_APP_ID_BASE = 111101111;
+  private final long SCHEDULER_APP_ID_INCREMENT = 111111111;
+
   BlockingQueue<AMSchedulerEvent> eventQueue
                               = new LinkedBlockingQueue<AMSchedulerEvent>();
 
+  // Not tracking container / task to schedulerId. Instead relying on everything flowing through
+  // the system and being propagated back via events.
+
   /**
    *
    * @param appContext
@@ -125,7 +138,7 @@ public class TaskSchedulerEventHandler extends AbstractService
   public TaskSchedulerEventHandler(AppContext appContext,
       DAGClientServer clientService, EventHandler eventHandler, 
       ContainerSignatureMatcher containerSignatureMatcher, WebUIService webUI,
-      String [] schedulerClasses) {
+      String [] schedulerClasses, boolean isPureLocalMode) {
     super(TaskSchedulerEventHandler.class.getName());
     this.appContext = appContext;
     this.eventHandler = eventHandler;
@@ -133,13 +146,39 @@ public class TaskSchedulerEventHandler extends AbstractService
     this.containerSignatureMatcher = containerSignatureMatcher;
     this.webUI = webUI;
     this.historyUrl = getHistoryUrl();
+    this.isPureLocalMode = isPureLocalMode;
     if (this.webUI != null) {
       this.webUI.setHistoryUrl(this.historyUrl);
     }
-    if (schedulerClasses == null || schedulerClasses.length == 0) {
-      this.taskSchedulerClasses = new String[] {TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+
+    // Override everything for pure local mode
+    if (isPureLocalMode) {
+      this.taskSchedulerClasses = new String[] {TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT};
+      this.yarnTaskSchedulerIndex = -1;
     } else {
-      this.taskSchedulerClasses = schedulerClasses;
+      if (schedulerClasses == null || schedulerClasses.length ==0) {
+        this.taskSchedulerClasses = new String[] {TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+        this.yarnTaskSchedulerIndex = 0;
+      } else {
+        // Ensure the YarnScheduler will be setup and note it's index. This will be responsible for heartbeats and YARN registration.
+        int foundYarnTaskSchedulerIndex = -1;
+        for (int i = 0 ; i < schedulerClasses.length ; i++) {
+          if (schedulerClasses[i].equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
+            foundYarnTaskSchedulerIndex = i;
+            break;
+          }
+        }
+        if (foundYarnTaskSchedulerIndex == -1) { // Not found. Add at the end.
+          this.taskSchedulerClasses = new String[schedulerClasses.length+1];
+          foundYarnTaskSchedulerIndex = this.taskSchedulerClasses.length -1;
+          for (int i = 0 ; i < schedulerClasses.length ; i++) { // Copy over the rest.
+            this.taskSchedulerClasses[i] = schedulerClasses[i];
+          }
+        } else {
+          this.taskSchedulerClasses = schedulerClasses;
+        }
+        this.yarnTaskSchedulerIndex = foundYarnTaskSchedulerIndex;
+      }
     }
     taskSchedulers = new TaskSchedulerService[this.taskSchedulerClasses.length];
   }
@@ -157,12 +196,12 @@ public class TaskSchedulerEventHandler extends AbstractService
     return cachedNodeCount;
   }
   
-  public Resource getAvailableResources() {
-    return taskSchedulers[0].getAvailableResources();
+  public Resource getAvailableResources(int schedulerId) {
+    return taskSchedulers[schedulerId].getAvailableResources();
   }
 
-  public Resource getTotalResources() {
-    return taskSchedulers[0].getTotalResources();
+  public Resource getTotalResources(int schedulerId) {
+    return taskSchedulers[schedulerId].getTotalResources();
   }
 
   public synchronized void handleEvent(AMSchedulerEvent sEvent) {
@@ -176,7 +215,7 @@ public class TaskSchedulerEventHandler extends AbstractService
       switch(event.getState()) {
       case FAILED:
       case KILLED:
-        handleTAUnsuccessfulEnd((AMSchedulerEventTAEnded) sEvent);
+        handleTAUnsuccessfulEnd(event);
         break;
       case SUCCEEDED:
         handleTASucceeded(event);
@@ -228,9 +267,9 @@ public class TaskSchedulerEventHandler extends AbstractService
 
   private void handleNodeBlacklistUpdate(AMSchedulerEventNodeBlacklistUpdate event) {
     if (event.getType() == AMSchedulerEventType.S_NODE_BLACKLISTED) {
-      taskSchedulers[0].blacklistNode(event.getNodeId());
+      taskSchedulers[event.getSchedulerId()].blacklistNode(event.getNodeId());
     } else if (event.getType() == AMSchedulerEventType.S_NODE_UNBLACKLISTED) {
-      taskSchedulers[0].unblacklistNode(event.getNodeId());
+      taskSchedulers[event.getSchedulerId()].unblacklistNode(event.getNodeId());
     } else {
       throw new TezUncheckedException("Invalid event type: " + event.getType());
     }
@@ -242,14 +281,14 @@ public class TaskSchedulerEventHandler extends AbstractService
     // TODO what happens to the task that was connected to this container?
     // current assumption is that it will eventually call handleTaStopRequest
     //TaskAttempt taskAttempt = (TaskAttempt)
-    taskSchedulers[0].deallocateContainer(containerId);
+    taskSchedulers[event.getSchedulerId()].deallocateContainer(containerId);
     // TODO does this container need to be stopped via C_STOP_REQUEST
     sendEvent(new AMContainerEventStopRequest(containerId));
   }
 
   private void handleTAUnsuccessfulEnd(AMSchedulerEventTAEnded event) {
     TaskAttempt attempt = event.getAttempt();
-    boolean wasContainerAllocated = taskSchedulers[0].deallocateTask(attempt, false);
+    boolean wasContainerAllocated = taskSchedulers[event.getSchedulerId()].deallocateTask(attempt, false);
     // use stored value of container id in case the scheduler has removed this
     // assignment because the task has been deallocated earlier.
     // retroactive case
@@ -291,7 +330,8 @@ public class TaskSchedulerEventHandler extends AbstractService
           event.getAttemptID()));
     }
 
-    boolean wasContainerAllocated = taskSchedulers[0].deallocateTask(attempt, true);
+    boolean wasContainerAllocated = taskSchedulers[event.getSchedulerId()].deallocateTask(attempt,
+        true);
     if (!wasContainerAllocated) {
       LOG.error("De-allocated successful task: " + attempt.getID()
           + ", but TaskScheduler reported no container assigned to task");
@@ -316,7 +356,7 @@ public class TaskSchedulerEventHandler extends AbstractService
         TaskAttempt affinityAttempt = vertex.getTask(taskIndex).getSuccessfulAttempt();
         if (affinityAttempt != null) {
           Preconditions.checkNotNull(affinityAttempt.getAssignedContainerID(), affinityAttempt.getID());
-          taskSchedulers[0].allocateTask(taskAttempt,
+          taskSchedulers[event.getSchedulerId()].allocateTask(taskAttempt,
               event.getCapability(),
               affinityAttempt.getAssignedContainerID(),
               Priority.newInstance(event.getPriority()),
@@ -336,7 +376,7 @@ public class TaskSchedulerEventHandler extends AbstractService
       }
     }
 
-    taskSchedulers[0].allocateTask(taskAttempt,
+    taskSchedulers[event.getSchedulerId()].allocateTask(taskAttempt,
         event.getCapability(),
         hosts,
         racks,
@@ -347,7 +387,8 @@ public class TaskSchedulerEventHandler extends AbstractService
 
   private TaskSchedulerService createTaskScheduler(String host, int port, String trackingUrl,
                                                    AppContext appContext,
-                                                   String schedulerClassName) {
+                                                   String schedulerClassName,
+                                                   long customAppIdIdentifier) {
     if (schedulerClassName.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
       LOG.info("Creating TaskScheduler: YarnTaskSchedulerService");
       return new YarnTaskSchedulerService(this, this.containerSignatureMatcher,
@@ -355,7 +396,7 @@ public class TaskSchedulerEventHandler extends AbstractService
     } else if (schedulerClassName.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
       LOG.info("Creating TaskScheduler: Local TaskScheduler");
       return new LocalTaskSchedulerService(this, this.containerSignatureMatcher,
-          host, port, trackingUrl, appContext);
+          host, port, trackingUrl, customAppIdIdentifier, appContext);
     } else {
       LOG.info("Creating custom TaskScheduler: " + schedulerClassName);
       // TODO TEZ-2003 Temporary reflection with specific parameters. Remove once there is a clean interface.
@@ -364,9 +405,10 @@ public class TaskSchedulerEventHandler extends AbstractService
       try {
         Constructor<? extends TaskSchedulerService> ctor = taskSchedulerClazz
             .getConstructor(TaskSchedulerAppCallback.class, AppContext.class, String.class,
-                int.class, String.class, Configuration.class);
+                int.class, String.class, long.class, Configuration.class);
         ctor.setAccessible(true);
-        return ctor.newInstance(this, appContext, host, port, trackingUrl, getConfig());
+        return ctor.newInstance(this, appContext, host, port, trackingUrl, customAppIdIdentifier,
+            getConfig());
       } catch (NoSuchMethodException e) {
         throw new TezUncheckedException(e);
       } catch (InvocationTargetException e) {
@@ -381,10 +423,19 @@ public class TaskSchedulerEventHandler extends AbstractService
 
   @VisibleForTesting
   protected void instantiateScheduelrs(String host, int port, String trackingUrl, AppContext appContext) {
+    // TODO Add error checking for components being used in the Vertex when running in pure local mode.
     // Iterate over the list and create all the taskSchedulers
+    int j = 0;
     for (int i = 0; i < taskSchedulerClasses.length; i++) {
+      long customAppIdIdentifier;
+      if (isPureLocalMode || taskSchedulerClasses[i].equals(
+          TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) { // Use the app identifier from the appId.
+        customAppIdIdentifier = appContext.getApplicationID().getClusterTimestamp();
+      } else {
+        customAppIdIdentifier = SCHEDULER_APP_ID_BASE + (j++ * SCHEDULER_APP_ID_INCREMENT);
+      }
       taskSchedulers[i] = createTaskScheduler(host, port,
-          trackingUrl, appContext, taskSchedulerClasses[i]);
+          trackingUrl, appContext, taskSchedulerClasses[i], customAppIdIdentifier);
     }
   }
 
@@ -403,12 +454,12 @@ public class TaskSchedulerEventHandler extends AbstractService
     for (int i = 0 ; i < taskSchedulers.length ; i++) {
       taskSchedulers[i].init(getConfig());
       taskSchedulers[i].start();
-    }
-
-    // TODO TEZ-2118 Start using multiple task schedulers
-    if (shouldUnregisterFlag.get()) {
-      // Flag may have been set earlier when task scheduler was not initialized
-      taskSchedulers[0].setShouldUnregister();
+      if (shouldUnregisterFlag.get()) {
+        // Flag may have been set earlier when task scheduler was not initialized
+        // TODO TEZ-2003 Should setRegister / unregister be part of APIs when not YARN specific ?
+        // External services could need to talk to some other entity.
+        taskSchedulers[i].setShouldUnregister();
+      }
     }
 
     this.eventHandlingThread = new Thread("TaskSchedulerEventHandlerThread") {
@@ -457,8 +508,10 @@ public class TaskSchedulerEventHandler extends AbstractService
       if (eventHandlingThread != null)
         eventHandlingThread.interrupt();
     }
-    if (taskSchedulers[0] != null) {
-      ((AbstractService)taskSchedulers[0]).stop();
+    for (int i = 0 ; i < taskSchedulers.length ; i++) {
+      if (taskSchedulers[i] != null) {
+        taskSchedulers[i].stop();
+      }
     }
   }
 
@@ -467,15 +520,18 @@ public class TaskSchedulerEventHandler extends AbstractService
   public synchronized void taskAllocated(Object task,
                                            Object appCookie,
                                            Container container) {
+    AMSchedulerEventTALaunchRequest event =
+        (AMSchedulerEventTALaunchRequest) appCookie;
     ContainerId containerId = container.getId();
-    if (appContext.getAllContainers().addContainerIfNew(container)) {
+    if (appContext.getAllContainers()
+        .addContainerIfNew(container, event.getSchedulerId(), event.getLauncherId(),
+            event.getTaskCommId())) {
       appContext.getNodeTracker().nodeSeen(container.getNodeId());
       sendEvent(new AMNodeEventContainerAllocated(container
           .getNodeId(), container.getId()));
     }
 
-    AMSchedulerEventTALaunchRequest event =
-                         (AMSchedulerEventTALaunchRequest) appCookie;
+
     TaskAttempt taskAttempt = event.getTaskAttempt();
     // TODO - perhaps check if the task still needs this container
     // because the deallocateTask downcall may have raced with the
@@ -484,7 +540,7 @@ public class TaskSchedulerEventHandler extends AbstractService
  
     if (appContext.getAllContainers().get(containerId).getState() == AMContainerState.ALLOCATED) {
       sendEvent(new AMContainerEventLaunchRequest(containerId, taskAttempt.getVertexID(),
-          event.getContainerContext()));
+          event.getContainerContext(), event.getLauncherId(), event.getTaskCommId()));
     }
     sendEvent(new DAGEventSchedulerUpdateTAAssigned(taskAttempt, container));
     sendEvent(new AMContainerEventAssignTA(containerId, taskAttempt.getID(),
@@ -603,6 +659,9 @@ public class TaskSchedulerEventHandler extends AbstractService
   public float getProgress() {
     // at this point allocate has been called and so node count must be available
     // may change after YARN-1722
+    // This is a heartbeat in from the scheduler into the APP, and is being used to piggy-back and
+    // node updates from the cluster.
+    // TODO Handle this in TEZ-2124. Need a way to know which scheduler is calling in.
     int nodeCount = taskSchedulers[0].getClusterNodeCount();
     if (nodeCount != cachedNodeCount) {
       cachedNodeCount = nodeCount;
@@ -618,7 +677,9 @@ public class TaskSchedulerEventHandler extends AbstractService
   }
 
   public void dagCompleted() {
-    taskSchedulers[0].dagComplete();
+    for (int i = 0 ; i < taskSchedulers.length ; i++) {
+      taskSchedulers[i].dagComplete();
+    }
   }
 
   public void dagSubmitted() {
@@ -628,7 +689,10 @@ public class TaskSchedulerEventHandler extends AbstractService
 
   @Override
   public void preemptContainer(ContainerId containerId) {
-    taskSchedulers[0].deallocateContainer(containerId);
+    // TODO Why is this making a call back into the scheduler, when the call is originating from there.
+    // An AMContainer instance should already exist if an attempt is being made to preempt it
+    AMContainer amContainer = appContext.getAllContainers().get(containerId);
+    taskSchedulers[amContainer.getTaskSchedulerIdentifier()].deallocateContainer(containerId);
     // Inform the Containers about completion.
     sendEvent(new AMContainerEventCompleted(containerId, ContainerExitStatus.INVALID,
         "Container preempted internally", TaskAttemptTerminationCause.INTERNAL_PREEMPTION));
@@ -637,13 +701,24 @@ public class TaskSchedulerEventHandler extends AbstractService
   public void setShouldUnregisterFlag() {
     LOG.info("TaskScheduler notified that it should unregister from RM");
     this.shouldUnregisterFlag.set(true);
-    if (this.taskSchedulers[0] != null) {
-      this.taskSchedulers[0].setShouldUnregister();
+    for (int i = 0 ; i < taskSchedulers.length ; i++) {
+      if (this.taskSchedulers[i] != null) {
+        // TODO TEZ-2003 registration required for all schedulers ?
+        this.taskSchedulers[i].setShouldUnregister();
+      }
     }
   }
 
   public boolean hasUnregistered() {
-    return this.taskSchedulers[0].hasUnregistered();
+    boolean result = true;
+    for (int i = 0 ; i < taskSchedulers.length ; i++) {
+      // TODO TEZ-2003 registration required for all schedulers ?
+      result |= this.taskSchedulers[i].hasUnregistered();
+      if (result == false) {
+        return result;
+      }
+    }
+    return result;
   }
 
   @VisibleForTesting

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java
index 0fc2e12..6616896 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java
@@ -34,4 +34,7 @@ public interface AMContainer extends EventHandler<AMContainerEvent>{
   public List<TezTaskAttemptID> getAllTaskAttempts();
   public TezTaskAttemptID getCurrentTaskAttempt();
 
+  public int getTaskSchedulerIdentifier();
+  public int getContainerLauncherIdentifier();
+  public int getTaskCommunicatorIdentifier();
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventLaunchRequest.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventLaunchRequest.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventLaunchRequest.java
index d973264..92e5817 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventLaunchRequest.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventLaunchRequest.java
@@ -27,12 +27,17 @@ public class AMContainerEventLaunchRequest extends AMContainerEvent {
 
   private final TezVertexID vertexId;
   private final ContainerContext containerContext;
+  private final int launcherId;
+  private final int taskCommId;
 
   public AMContainerEventLaunchRequest(ContainerId containerId,
-      TezVertexID vertexId, ContainerContext containerContext) {
+      TezVertexID vertexId, ContainerContext containerContext,
+      int launcherId, int taskCommId) {
     super(containerId, AMContainerEventType.C_LAUNCH_REQUEST);
     this.vertexId = vertexId;
     this.containerContext = containerContext;
+    this.launcherId = launcherId;
+    this.taskCommId = taskCommId;
   }
 
   public TezDAGID getDAGId() {
@@ -46,4 +51,12 @@ public class AMContainerEventLaunchRequest extends AMContainerEvent {
   public ContainerContext getContainerContext() {
     return this.containerContext;
   }
+
+  public int getLauncherId() {
+    return launcherId;
+  }
+
+  public int getTaskCommId() {
+    return taskCommId;
+  }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
index 1acec9c..39df2e8 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
@@ -81,6 +81,9 @@ public class AMContainerImpl implements AMContainer {
   private final TaskAttemptListener taskAttemptListener;
   protected final EventHandler eventHandler;
   private final ContainerSignatureMatcher signatureMatcher;
+  private final int schedulerId;
+  private final int launcherId;
+  private final int taskCommId;
 
   private final List<TezTaskAttemptID> completedAttempts =
       new LinkedList<TezTaskAttemptID>();
@@ -302,7 +305,7 @@ public class AMContainerImpl implements AMContainer {
   // additional change - JvmID, YarnChild, etc depend on TaskType.
   public AMContainerImpl(Container container, ContainerHeartbeatHandler chh,
       TaskAttemptListener tal, ContainerSignatureMatcher signatureMatcher,
-      AppContext appContext) {
+      AppContext appContext, int schedulerId, int launcherId, int taskCommId) {
     ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock();
     this.readLock = rwLock.readLock();
     this.writeLock = rwLock.writeLock();
@@ -314,6 +317,9 @@ public class AMContainerImpl implements AMContainer {
     this.containerHeartbeatHandler = chh;
     this.taskAttemptListener = tal;
     this.failedAssignments = new LinkedList<TezTaskAttemptID>();
+    this.schedulerId = schedulerId;
+    this.launcherId = launcherId;
+    this.taskCommId = taskCommId;
     this.stateMachine = stateMachineFactory.make(this);
   }
 
@@ -363,6 +369,21 @@ public class AMContainerImpl implements AMContainer {
     }
   }
 
+  @Override
+  public int getTaskSchedulerIdentifier() {
+    return this.schedulerId;
+  }
+
+  @Override
+  public int getContainerLauncherIdentifier() {
+    return this.launcherId;
+  }
+
+  @Override
+  public int getTaskCommunicatorIdentifier() {
+    return this.taskCommId;
+  }
+
   public boolean isInErrorState() {
     return inError;
   }
@@ -432,7 +453,7 @@ public class AMContainerImpl implements AMContainer {
           containerContext.getLocalResources(),
           containerContext.getEnvironment(),
           containerContext.getJavaOpts(),
-          container.taskAttemptListener.getAddress(), containerContext.getCredentials(),
+          container.taskAttemptListener.getTaskCommunicator(container.taskCommId).getAddress(), containerContext.getCredentials(),
           container.appContext, container.container.getResource(),
           container.appContext.getAMConf());
 
@@ -1014,7 +1035,7 @@ public class AMContainerImpl implements AMContainer {
   }
   
   protected void deAllocate() {
-    sendEvent(new AMSchedulerEventDeallocateContainer(containerId));
+    sendEvent(new AMSchedulerEventDeallocateContainer(containerId, schedulerId));
   }
 
   protected void sendTerminatedToTaskAttempt(
@@ -1044,28 +1065,28 @@ public class AMContainerImpl implements AMContainer {
   }
 
   protected void sendStartRequestToNM(ContainerLaunchContext clc) {
-    sendEvent(new NMCommunicatorLaunchRequestEvent(clc, container));
+    sendEvent(new NMCommunicatorLaunchRequestEvent(clc, container, launcherId, taskCommId));
   }
 
   protected void sendStopRequestToNM() {
     sendEvent(new NMCommunicatorStopRequestEvent(containerId,
-        container.getNodeId(), container.getContainerToken()));
+        container.getNodeId(), container.getContainerToken(), launcherId));
   }
 
   protected void unregisterAttemptFromListener(TezTaskAttemptID attemptId) {
-    taskAttemptListener.unregisterTaskAttempt(attemptId);
+    taskAttemptListener.unregisterTaskAttempt(attemptId, taskCommId);
   }
 
   protected void registerAttemptWithListener(AMContainerTask amContainerTask) {
-    taskAttemptListener.registerTaskAttempt(amContainerTask, this.containerId);
+    taskAttemptListener.registerTaskAttempt(amContainerTask, this.containerId, taskCommId);
   }
 
   protected void registerWithTAListener() {
-    taskAttemptListener.registerRunningContainer(containerId);
+    taskAttemptListener.registerRunningContainer(containerId, taskCommId);
   }
 
   protected void unregisterFromTAListener() {
-    this.taskAttemptListener.unregisterRunningContainer(containerId);
+    this.taskAttemptListener.unregisterRunningContainer(containerId, taskCommId);
   }
 
   protected void registerWithContainerListener() {

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerMap.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerMap.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerMap.java
index 574c38e..938096d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerMap.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerMap.java
@@ -62,9 +62,9 @@ public class AMContainerMap extends AbstractService implements EventHandler<AMCo
     }
   }
 
-  public boolean addContainerIfNew(Container container) {
+  public boolean addContainerIfNew(Container container, int schedulerId, int launcherId, int taskCommId) {
     AMContainer amc = new AMContainerImpl(container, chh, tal,
-      containerSignatureMatcher, context);
+      containerSignatureMatcher, context, schedulerId, launcherId, taskCommId);
     return (containerMap.putIfAbsent(container.getId(), amc) == null);
   }
 

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeImpl.java
index b93cab3..0d8e4cd 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeImpl.java
@@ -257,7 +257,8 @@ public class AMNodeImpl implements AMNode {
     // these containers are not useful anymore
     pastContainers.addAll(containers);
     containers.clear();
-    sendEvent(new AMSchedulerEventNodeBlacklistUpdate(getNodeId(), true));
+    // TODO TEZ-2124 node tracking per ext source
+    sendEvent(new AMSchedulerEventNodeBlacklistUpdate(getNodeId(), true, 0));
   }
 
   @SuppressWarnings("unchecked")
@@ -363,7 +364,8 @@ public class AMNodeImpl implements AMNode {
     public void transition(AMNodeImpl node, AMNodeEvent nEvent) {
       node.ignoreBlacklisting = ignore;
       if (node.getState() == AMNodeState.BLACKLISTED) {
-        node.sendEvent(new AMSchedulerEventNodeBlacklistUpdate(node.getNodeId(), false));
+        // TODO TEZ-2124 node tracking per ext source
+        node.sendEvent(new AMSchedulerEventNodeBlacklistUpdate(node.getNodeId(), false, 0));
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index aace92b..59efb87 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -196,7 +196,7 @@ public class MockDAGAppMaster extends DAGAppMaster {
     @Override
     public void serviceStart() throws Exception {
       taListener = (TaskAttemptListenerImpTezDag) getTaskAttemptListener();
-      taskCommunicator = (TezTaskCommunicatorImpl) taListener.getTaskCommunicator();
+      taskCommunicator = (TezTaskCommunicatorImpl) taListener.getTaskCommunicator(0);
       eventHandlingThread = new Thread(this);
       eventHandlingThread.start();
       ExecutorService rawExecutor = Executors.newFixedThreadPool(handlerConcurrency,

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
index 33f4817..98fcddc 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
@@ -110,9 +110,16 @@ public class TestTaskAttemptListenerImplTezDag {
     doReturn(dag).when(appContext).getCurrentDAG();
     doReturn(appAcls).when(appContext).getApplicationACLs();
     doReturn(amContainerMap).when(appContext).getAllContainers();
+    NodeId nodeId = NodeId.newInstance("localhost", 0);
+
+    AMContainer amContainer = mock(AMContainer.class);
+    Container container = mock(Container.class);
+    doReturn(nodeId).when(container).getNodeId();
+    doReturn(amContainer).when(amContainerMap).get(any(ContainerId.class));
+    doReturn(container).when(amContainer).getContainer();
 
     taskAttemptListener = new TaskAttemptListenerImplForTest(appContext,
-        mock(TaskHeartbeatHandler.class), mock(ContainerHeartbeatHandler.class), null);
+        mock(TaskHeartbeatHandler.class), mock(ContainerHeartbeatHandler.class), null, null);
 
     taskSpec = mock(TaskSpec.class);
     doReturn(taskAttemptID).when(taskSpec).getTaskAttemptID();
@@ -123,7 +130,7 @@ public class TestTaskAttemptListenerImplTezDag {
   @Test(timeout = 5000)
   public void testGetTask() throws IOException {
 
-    TezTaskCommunicatorImpl taskCommunicator = (TezTaskCommunicatorImpl)taskAttemptListener.getTaskCommunicator();
+    TezTaskCommunicatorImpl taskCommunicator = (TezTaskCommunicatorImpl)taskAttemptListener.getTaskCommunicator(0);
     TezTaskUmbilicalProtocol tezUmbilical = taskCommunicator.getUmbilical();
 
     ContainerId containerId1 = createContainerId(appId, 1);
@@ -133,55 +140,55 @@ public class TestTaskAttemptListenerImplTezDag {
 
     ContainerId containerId2 = createContainerId(appId, 2);
     ContainerContext containerContext2 = new ContainerContext(containerId2.toString());
-    taskAttemptListener.registerRunningContainer(containerId2);
+    taskAttemptListener.registerRunningContainer(containerId2, 0);
     containerTask = tezUmbilical.getTask(containerContext2);
     assertNull(containerTask);
 
     // Valid task registered
-    taskAttemptListener.registerTaskAttempt(amContainerTask, containerId2);
+    taskAttemptListener.registerTaskAttempt(amContainerTask, containerId2, 0);
     containerTask = tezUmbilical.getTask(containerContext2);
     assertFalse(containerTask.shouldDie());
     assertEquals(taskSpec, containerTask.getTaskSpec());
 
     // Task unregistered. Should respond to heartbeats
-    taskAttemptListener.unregisterTaskAttempt(taskAttemptId);
+    taskAttemptListener.unregisterTaskAttempt(taskAttemptId, 0);
     containerTask = tezUmbilical.getTask(containerContext2);
     assertNull(containerTask);
 
     // Container unregistered. Should send a shouldDie = true
-    taskAttemptListener.unregisterRunningContainer(containerId2);
+    taskAttemptListener.unregisterRunningContainer(containerId2, 0);
     containerTask = tezUmbilical.getTask(containerContext2);
     assertTrue(containerTask.shouldDie());
 
     ContainerId containerId3 = createContainerId(appId, 3);
     ContainerContext containerContext3 = new ContainerContext(containerId3.toString());
-    taskAttemptListener.registerRunningContainer(containerId3);
+    taskAttemptListener.registerRunningContainer(containerId3, 0);
 
     // Register task to container3, followed by unregistering container 3 all together
     TaskSpec taskSpec2 = mock(TaskSpec.class);
     TezTaskAttemptID taskAttemptId2 = mock(TezTaskAttemptID.class);
     doReturn(taskAttemptId2).when(taskSpec2).getTaskAttemptID();
     AMContainerTask amContainerTask2 = new AMContainerTask(taskSpec, null, null, false, 0);
-    taskAttemptListener.registerTaskAttempt(amContainerTask2, containerId3);
-    taskAttemptListener.unregisterRunningContainer(containerId3);
+    taskAttemptListener.registerTaskAttempt(amContainerTask2, containerId3, 0);
+    taskAttemptListener.unregisterRunningContainer(containerId3, 0);
     containerTask = tezUmbilical.getTask(containerContext3);
     assertTrue(containerTask.shouldDie());
   }
 
   @Test(timeout = 5000)
   public void testGetTaskMultiplePulls() throws IOException {
-    TezTaskCommunicatorImpl taskCommunicator = (TezTaskCommunicatorImpl)taskAttemptListener.getTaskCommunicator();
+    TezTaskCommunicatorImpl taskCommunicator = (TezTaskCommunicatorImpl)taskAttemptListener.getTaskCommunicator(0);
     TezTaskUmbilicalProtocol tezUmbilical = taskCommunicator.getUmbilical();
 
     ContainerId containerId1 = createContainerId(appId, 1);
     doReturn(mock(AMContainer.class)).when(amContainerMap).get(containerId1);
     ContainerContext containerContext1 = new ContainerContext(containerId1.toString());
-    taskAttemptListener.registerRunningContainer(containerId1);
+    taskAttemptListener.registerRunningContainer(containerId1, 0);
     containerTask = tezUmbilical.getTask(containerContext1);
     assertNull(containerTask);
 
     // Register task
-    taskAttemptListener.registerTaskAttempt(amContainerTask, containerId1);
+    taskAttemptListener.registerTaskAttempt(amContainerTask, containerId1, 0);
     containerTask = tezUmbilical.getTask(containerContext1);
     assertFalse(containerTask.shouldDie());
     assertEquals(taskSpec, containerTask.getTaskSpec());

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java
index 60c4c88..9df225c 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.util.Clock;
 import org.apache.hadoop.yarn.util.SystemClock;
 import org.apache.tez.common.MockDNSToSwitchMapping;
+import org.apache.tez.dag.api.TaskCommunicator;
 import org.apache.tez.dag.api.TaskLocationHint;
 import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
@@ -273,8 +274,9 @@ public class TestTaskAttempt {
 
     MockEventHandler eventHandler = new MockEventHandler();
     TaskAttemptListener taListener = mock(TaskAttemptListener.class);
-    when(taListener.getAddress()).thenReturn(
-        new InetSocketAddress("localhost", 0));
+    TaskCommunicator taskComm = mock(TaskCommunicator.class);
+    doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+    doReturn(taskComm).when(taListener).getTaskCommunicator(0);
 
     Configuration taskConf = new Configuration();
     taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -323,8 +325,9 @@ public class TestTaskAttempt {
 
     MockEventHandler eventHandler = spy(new MockEventHandler());
     TaskAttemptListener taListener = mock(TaskAttemptListener.class);
-    when(taListener.getAddress()).thenReturn(
-        new InetSocketAddress("localhost", 0));
+    TaskCommunicator taskComm = mock(TaskCommunicator.class);
+    doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+    doReturn(taskComm).when(taListener).getTaskCommunicator(0);
 
     Configuration taskConf = new Configuration();
     taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -345,7 +348,7 @@ public class TestTaskAttempt {
     AMContainerMap containers = new AMContainerMap(
         mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
         new ContainerContextMatcher(), appCtx);
-    containers.addContainerIfNew(container);
+    containers.addContainerIfNew(container, 0, 0, 0);
 
     doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
     doReturn(containers).when(appCtx).getAllContainers();
@@ -424,8 +427,9 @@ public class TestTaskAttempt {
 
     MockEventHandler eventHandler = new MockEventHandler();
     TaskAttemptListener taListener = mock(TaskAttemptListener.class);
-    when(taListener.getAddress()).thenReturn(
-        new InetSocketAddress("localhost", 0));
+    TaskCommunicator taskComm = mock(TaskCommunicator.class);
+    doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+    doReturn(taskComm).when(taListener).getTaskCommunicator(0);
 
     Configuration taskConf = new Configuration();
     taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -446,7 +450,7 @@ public class TestTaskAttempt {
     AMContainerMap containers = new AMContainerMap(
         mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
         new ContainerContextMatcher(), appCtx);
-    containers.addContainerIfNew(container);
+    containers.addContainerIfNew(container, 0, 0, 0);
 
     doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
     doReturn(containers).when(appCtx).getAllContainers();
@@ -489,8 +493,9 @@ public class TestTaskAttempt {
 
     MockEventHandler eventHandler = spy(new MockEventHandler());
     TaskAttemptListener taListener = mock(TaskAttemptListener.class);
-    when(taListener.getAddress()).thenReturn(
-        new InetSocketAddress("localhost", 0));
+    TaskCommunicator taskComm = mock(TaskCommunicator.class);
+    doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+    doReturn(taskComm).when(taListener).getTaskCommunicator(0);
 
     Configuration taskConf = new Configuration();
     taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -511,7 +516,7 @@ public class TestTaskAttempt {
     AMContainerMap containers = new AMContainerMap(
         mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
         new ContainerContextMatcher(), appCtx);
-    containers.addContainerIfNew(container);
+    containers.addContainerIfNew(container, 0, 0, 0);
 
     doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
     doReturn(containers).when(appCtx).getAllContainers();
@@ -581,8 +586,9 @@ public class TestTaskAttempt {
 
     MockEventHandler eventHandler = spy(new MockEventHandler());
     TaskAttemptListener taListener = mock(TaskAttemptListener.class);
-    when(taListener.getAddress()).thenReturn(
-        new InetSocketAddress("localhost", 0));
+    TaskCommunicator taskComm = mock(TaskCommunicator.class);
+    doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+    doReturn(taskComm).when(taListener).getTaskCommunicator(0);
 
     Configuration taskConf = new Configuration();
     taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -604,7 +610,7 @@ public class TestTaskAttempt {
     AMContainerMap containers = new AMContainerMap(
         mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
         new ContainerContextMatcher(), appCtx);
-    containers.addContainerIfNew(container);
+    containers.addContainerIfNew(container, 0, 0, 0);
 
     doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
     doReturn(containers).when(appCtx).getAllContainers();
@@ -712,8 +718,9 @@ public class TestTaskAttempt {
 
     MockEventHandler eventHandler = spy(new MockEventHandler());
     TaskAttemptListener taListener = mock(TaskAttemptListener.class);
-    when(taListener.getAddress()).thenReturn(
-        new InetSocketAddress("localhost", 0));
+    TaskCommunicator taskComm = mock(TaskCommunicator.class);
+    doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+    doReturn(taskComm).when(taListener).getTaskCommunicator(0);
 
     Configuration taskConf = new Configuration();
     taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -735,7 +742,7 @@ public class TestTaskAttempt {
     AMContainerMap containers = new AMContainerMap(
         mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
         new ContainerContextMatcher(), appCtx);
-    containers.addContainerIfNew(container);
+    containers.addContainerIfNew(container, 0, 0, 0);
 
     doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
     doReturn(containers).when(appCtx).getAllContainers();
@@ -804,8 +811,9 @@ public class TestTaskAttempt {
 
     MockEventHandler eventHandler = spy(new MockEventHandler());
     TaskAttemptListener taListener = mock(TaskAttemptListener.class);
-    when(taListener.getAddress()).thenReturn(
-        new InetSocketAddress("localhost", 0));
+    TaskCommunicator taskComm = mock(TaskCommunicator.class);
+    doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+    doReturn(taskComm).when(taListener).getTaskCommunicator(0);
 
     Configuration taskConf = new Configuration();
     taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -826,7 +834,7 @@ public class TestTaskAttempt {
     AMContainerMap containers = new AMContainerMap(
         mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
         new ContainerContextMatcher(), appCtx);
-    containers.addContainerIfNew(container);
+    containers.addContainerIfNew(container, 0, 0, 0);
 
     doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
     doReturn(containers).when(appCtx).getAllContainers();
@@ -899,8 +907,9 @@ public class TestTaskAttempt {
 
     MockEventHandler eventHandler = spy(new MockEventHandler());
     TaskAttemptListener taListener = mock(TaskAttemptListener.class);
-    when(taListener.getAddress()).thenReturn(
-        new InetSocketAddress("localhost", 0));
+    TaskCommunicator taskComm = mock(TaskCommunicator.class);
+    doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+    doReturn(taskComm).when(taListener).getTaskCommunicator(0);
 
     Configuration taskConf = new Configuration();
     taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -921,7 +930,7 @@ public class TestTaskAttempt {
     AMContainerMap containers = new AMContainerMap(
         mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
         new ContainerContextMatcher(), appCtx);
-    containers.addContainerIfNew(container);
+    containers.addContainerIfNew(container, 0, 0, 0);
 
     doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
     doReturn(containers).when(appCtx).getAllContainers();
@@ -1002,8 +1011,9 @@ public class TestTaskAttempt {
 
     MockEventHandler eventHandler = spy(new MockEventHandler());
     TaskAttemptListener taListener = mock(TaskAttemptListener.class);
-    when(taListener.getAddress()).thenReturn(
-        new InetSocketAddress("localhost", 0));
+    TaskCommunicator taskComm = mock(TaskCommunicator.class);
+    doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+    doReturn(taskComm).when(taListener).getTaskCommunicator(0);
 
     Configuration taskConf = new Configuration();
     taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -1024,7 +1034,7 @@ public class TestTaskAttempt {
     AMContainerMap containers = new AMContainerMap(
         mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
         new ContainerContextMatcher(), appCtx);
-    containers.addContainerIfNew(container);
+    containers.addContainerIfNew(container, 0, 0, 0);
 
     doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
     doReturn(containers).when(appCtx).getAllContainers();
@@ -1102,8 +1112,9 @@ public class TestTaskAttempt {
     MockEventHandler mockEh = new MockEventHandler();
     MockEventHandler eventHandler = spy(mockEh);
     TaskAttemptListener taListener = mock(TaskAttemptListener.class);
-    when(taListener.getAddress()).thenReturn(
-        new InetSocketAddress("localhost", 0));
+    TaskCommunicator taskComm = mock(TaskCommunicator.class);
+    doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+    doReturn(taskComm).when(taListener).getTaskCommunicator(0);
 
     Configuration taskConf = new Configuration();
     taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -1124,7 +1135,7 @@ public class TestTaskAttempt {
     AMContainerMap containers = new AMContainerMap(
         mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
         new ContainerContextMatcher(), appCtx);
-    containers.addContainerIfNew(container);
+    containers.addContainerIfNew(container, 0, 0, 0);
 
     doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
     doReturn(containers).when(appCtx).getAllContainers();

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
index a8eaca1..0aa3118 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
@@ -2176,7 +2176,7 @@ public class TestVertexImpl {
     doReturn(dagId).when(appContext).getCurrentDAGID();
     doReturn(dagId).when(dag).getID();
     doReturn(taskScheduler).when(appContext).getTaskScheduler();
-    doReturn(Resource.newInstance(102400, 60)).when(taskScheduler).getTotalResources();
+    doReturn(Resource.newInstance(102400, 60)).when(taskScheduler).getTotalResources(0);
     doReturn(historyEventHandler).when(appContext).getHistoryHandler();
     doReturn(dispatcher.getEventHandler()).when(appContext).getEventHandler();
 
@@ -3041,7 +3041,7 @@ public class TestVertexImpl {
     AMContainerMap containers = new AMContainerMap(
         mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
         new ContainerContextMatcher(), appContext);
-    containers.addContainerIfNew(container);
+    containers.addContainerIfNew(container, 0, 0, 0);
     doReturn(containers).when(appContext).getAllContainers();
 
     ta.handle(new TaskAttemptEventStartedRemotely(ta.getID(), contId, null));
@@ -3076,7 +3076,7 @@ public class TestVertexImpl {
     AMContainerMap containers = new AMContainerMap(
         mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
         new ContainerContextMatcher(), appContext);
-    containers.addContainerIfNew(container);
+    containers.addContainerIfNew(container, 0, 0, 0);
     doReturn(containers).when(appContext).getAllContainers();
 
     ta.handle(new TaskAttemptEventStartedRemotely(ta.getID(), contId, null));
@@ -3112,7 +3112,7 @@ public class TestVertexImpl {
     AMContainerMap containers = new AMContainerMap(
         mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
         new ContainerContextMatcher(), appContext);
-    containers.addContainerIfNew(container);
+    containers.addContainerIfNew(container, 0, 0, 0);
     doReturn(containers).when(appContext).getAllContainers();
 
     ta.handle(new TaskAttemptEventStartedRemotely(ta.getID(), contId, null));


[03/43] tez git commit: TEZ-2417. Tez UI: Counters are blank in the Attempts page if all attempts failed (Sreenath Somarajapuram via pramachandran)

Posted by ss...@apache.org.
TEZ-2417. Tez UI: Counters are blank in the Attempts page if all attempts failed (Sreenath Somarajapuram via pramachandran)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/da3fecd5
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/da3fecd5
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/da3fecd5

Branch: refs/heads/TEZ-2003
Commit: da3fecd53df56b96e29c8608068bf46a1995d611
Parents: 6a04fa4
Author: Prakash Ramachandran <pr...@hortonworks.com>
Authored: Wed May 6 19:27:39 2015 +0530
Committer: Prakash Ramachandran <pr...@hortonworks.com>
Committed: Wed May 6 19:27:39 2015 +0530

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../scripts/controllers/shared-controllers.js   |  1 -
 .../controllers/task-counters-controller.js     | 28 ++++++++++++++++++++
 .../app/scripts/models/TimelineRestAdapter.js   |  2 +-
 .../webapp/app/templates/common/counters.hbs    | 18 ++++++++-----
 5 files changed, 42 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/da3fecd5/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 91dd9c4..f95e626 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -15,6 +15,7 @@ INCOMPATIBLE CHANGES
   TEZ-1993. Implement a pluggable InputSizeEstimator for grouping fairly
 
 ALL CHANGES:
+  TEZ-2417. Tez UI: Counters are blank in the Attempts page if all attempts failed
   TEZ-2366. Pig tez MiniTezCluster unit tests fail intermittently after TEZ-2333
   TEZ-2406. Tez UI: Display per-io counter columns in task and attempt pages under vertex
   TEZ-2384. Add warning message in the case of prewarn under non-session mode.

http://git-wip-us.apache.org/repos/asf/tez/blob/da3fecd5/tez-ui/src/main/webapp/app/scripts/controllers/shared-controllers.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/controllers/shared-controllers.js b/tez-ui/src/main/webapp/app/scripts/controllers/shared-controllers.js
index 70d5e8c..e63fcf5 100644
--- a/tez-ui/src/main/webapp/app/scripts/controllers/shared-controllers.js
+++ b/tez-ui/src/main/webapp/app/scripts/controllers/shared-controllers.js
@@ -18,7 +18,6 @@
 
 App.DagCountersController =
     App.VertexCountersController =
-    App.TaskCountersController =
     App.TaskAttemptCountersController =
 
     Em.ObjectController.extend(App.ModelRefreshMixin);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tez/blob/da3fecd5/tez-ui/src/main/webapp/app/scripts/controllers/task-counters-controller.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/controllers/task-counters-controller.js b/tez-ui/src/main/webapp/app/scripts/controllers/task-counters-controller.js
new file mode 100644
index 0000000..5d514bd
--- /dev/null
+++ b/tez-ui/src/main/webapp/app/scripts/controllers/task-counters-controller.js
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+App.TaskCountersController = Em.ObjectController.extend(App.ModelRefreshMixin, {
+  message: function () {
+    var status = this.get('content.status');
+    if(!this.get('content.counterGroups.length')) {
+      if(status == 'KILLED' || status == 'FAILED') {
+        return 'Task %@, please check the counters of individual task attempts.'.fmt(status);
+      }
+    }
+  }.property('content.status', 'content.counterGroups.length')
+});
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tez/blob/da3fecd5/tez-ui/src/main/webapp/app/scripts/models/TimelineRestAdapter.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/models/TimelineRestAdapter.js b/tez-ui/src/main/webapp/app/scripts/models/TimelineRestAdapter.js
index 5db0990..704ba2f 100644
--- a/tez-ui/src/main/webapp/app/scripts/models/TimelineRestAdapter.js
+++ b/tez-ui/src/main/webapp/app/scripts/models/TimelineRestAdapter.js
@@ -93,7 +93,7 @@ var timelineJsonToDagMap = {
   endTime: 'otherinfo.endTime',
   name: 'primaryfilters.dagName.0',
   user: 'primaryfilters.user.0',
-  applicationId: 'otherinfo.applicationId',
+  applicationId: 'primaryfilters.applicationId.0',
   status: 'otherinfo.status',
   hasFailedTaskAttempts: {
     custom: function(source) {

http://git-wip-us.apache.org/repos/asf/tez/blob/da3fecd5/tez-ui/src/main/webapp/app/templates/common/counters.hbs
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/templates/common/counters.hbs b/tez-ui/src/main/webapp/app/templates/common/counters.hbs
index 30419e1..44d0b2c 100644
--- a/tez-ui/src/main/webapp/app/templates/common/counters.hbs
+++ b/tez-ui/src/main/webapp/app/templates/common/counters.hbs
@@ -21,9 +21,15 @@
   time=timeStamp
   refresh='refresh'
 }}
-<div class='table-container margin-small-vertical'>
-  {{counter-table-component
-    data=counterGroups
-    timeStamp=timeStamp
-  }}
-</div>
+{{#if counterGroups.length}}
+  <div class='table-container margin-small-vertical'>
+    {{counter-table-component
+      data=counterGroups
+      timeStamp=timeStamp
+    }}
+  </div>
+{{else}}
+  <h4>Counters not available!</h4>
+{{/if}}
+<h6>{{message}}</h6>
+


[38/43] tez git commit: TEZ-2347. Expose additional information in TaskCommunicatorContext. (sseth)

Posted by ss...@apache.org.
TEZ-2347. Expose additional information in TaskCommunicatorContext. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/44f5e8f1
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/44f5e8f1
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/44f5e8f1

Branch: refs/heads/TEZ-2003
Commit: 44f5e8f1b8392ec34177c7bfb968e4f6fcb3c6bb
Parents: e7c3cc7
Author: Siddharth Seth <ss...@apache.org>
Authored: Mon Apr 20 13:17:31 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:55 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                            |  1 +
 .../tez/dag/api/TaskCommunicatorContext.java    | 50 ++++++++++++++++++++
 .../dag/app/TaskCommunicatorContextImpl.java    | 50 ++++++++++++++++++++
 .../java/org/apache/tez/dag/app/dag/DAG.java    |  2 +
 .../java/org/apache/tez/dag/app/dag/Task.java   |  1 +
 .../org/apache/tez/dag/app/dag/TaskAttempt.java |  6 +++
 .../apache/tez/dag/app/dag/impl/DAGImpl.java    | 10 ++++
 .../tez/dag/app/dag/impl/TaskAttemptImpl.java   | 12 +++++
 .../apache/tez/dag/app/dag/impl/TaskImpl.java   | 13 ++++-
 9 files changed, 144 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index ca5225e..7c13110 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -16,5 +16,6 @@ ALL CHANGES:
   TEZ-2284. Separate TaskReporter into an interface.
   TEZ-2285. Allow TaskCommunicators to indicate task/container liveness.
   TEZ-2302. Allow TaskCommunicators to subscribe for Vertex updates.
+  TEZ-2347. Expose additional information in TaskCommunicatorContext.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
index 19caed9..56345ab 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
@@ -16,6 +16,7 @@ package org.apache.tez.dag.api;
 
 import javax.annotation.Nullable;
 import java.io.IOException;
+import java.util.Collection;
 import java.util.Set;
 
 import org.apache.hadoop.security.Credentials;
@@ -71,4 +72,53 @@ public interface TaskCommunicatorContext {
   // TODO TEZ-2003 API. Should a method exist for task succeeded.
 
   // TODO Eventually Add methods to report availability stats to the scheduler.
+
+  /**
+   * Get the name of the currently executing dag
+   * @return the name of the currently executing dag
+   */
+  String getCurretnDagName();
+
+  /**
+   * Get the name of the Input vertices for the specified vertex.
+   * Root Inputs are not returned.
+   * @param vertexName the vertex for which source vertex names will be returned
+   * @return an Iterable containing the list of input vertices for the specified vertex
+   */
+  Iterable<String> getInputVertexNames(String vertexName);
+
+  /**
+   * Get the total number of tasks in the given vertex
+   * @param vertexName
+   * @return total number of tasks in this vertex
+   */
+  int getVertexTotalTaskCount(String vertexName);
+
+  /**
+   * Get the number of completed tasks for a given vertex
+   * @param vertexName the vertex name
+   * @return the number of completed tasks for the vertex
+   */
+  int getVertexCompletedTaskCount(String vertexName);
+
+  /**
+   * Get the number of running tasks for a given vertex
+   * @param vertexName the vertex name
+   * @return the number of running tasks for the vertex
+   */
+  int getVertexRunningTaskCount(String vertexName);
+
+  /**
+   * Get the start time for the first attempt of the specified task
+   * @param vertexName the vertex to which the task belongs
+   * @param taskIndex the index of the task
+   * @return the start time for the first attempt of the task
+   */
+  long getFirstAttemptStartTime(String vertexName, int taskIndex);
+
+  /**
+   * Get the start time for the currently executing DAG
+   * @return time when the current dag started executing
+   */
+  long getDagStartTime();
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
index 3714c3c..4cb0c93 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
@@ -18,7 +18,9 @@ import javax.annotation.Nullable;
 import java.io.IOException;
 import java.util.Set;
 
+import com.google.common.base.Function;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
@@ -31,6 +33,7 @@ import org.apache.tez.dag.api.TezException;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.api.event.VertexState;
 import org.apache.tez.dag.api.event.VertexStateUpdate;
+import org.apache.tez.dag.app.dag.Vertex;
 import org.apache.tez.dag.app.dag.VertexStateUpdateListener;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 
@@ -111,6 +114,53 @@ public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, Ver
     context.getCurrentDAG().getStateChangeNotifier().registerForVertexUpdates(vertexName, stateSet, this);
   }
 
+  @Override
+  public String getCurretnDagName() {
+    return context.getCurrentDAG().getName();
+  }
+
+  @Override
+  public Iterable<String> getInputVertexNames(String vertexName) {
+    Preconditions.checkNotNull(vertexName, "VertexName cannot be null: " + vertexName);
+    Vertex vertex = context.getCurrentDAG().getVertex(vertexName);
+    Set<Vertex> sources = vertex.getInputVertices().keySet();
+    return Iterables.transform(sources, new Function<Vertex, String>() {
+      @Override
+      public String apply(@Nullable Vertex input) {
+        return input.getName();
+      }
+    });
+  }
+
+  @Override
+  public int getVertexTotalTaskCount(String vertexName) {
+    Preconditions.checkArgument(vertexName != null, "VertexName must be specified");
+    return context.getCurrentDAG().getVertex(vertexName).getTotalTasks();
+  }
+
+  @Override
+  public int getVertexCompletedTaskCount(String vertexName) {
+    Preconditions.checkArgument(vertexName != null, "VertexName must be specified");
+    return context.getCurrentDAG().getVertex(vertexName).getCompletedTasks();
+  }
+
+  @Override
+  public int getVertexRunningTaskCount(String vertexName) {
+    Preconditions.checkArgument(vertexName != null, "VertexName must be specified");
+    return context.getCurrentDAG().getVertex(vertexName).getRunningTasks();
+  }
+
+  @Override
+  public long getFirstAttemptStartTime(String vertexName, int taskIndex) {
+    Preconditions.checkArgument(vertexName != null, "VertexName must be specified");
+    Preconditions.checkArgument(taskIndex >=0, "TaskIndex must be > 0");
+    return context.getCurrentDAG().getVertex(vertexName).getTask(taskIndex).getFirstAttemptStartTime();
+  }
+
+  @Override
+  public long getDagStartTime() {
+    return context.getCurrentDAG().getStartTime();
+  }
 
   @Override
   public void onStateUpdated(VertexStateUpdate event) {

http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
index 6d6872b..458362f 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
@@ -94,6 +94,8 @@ public interface DAG {
 
   Map<String, TezVertexID> getVertexNameIDMapping();
 
+  long getStartTime();
+
   StateChangeNotifier getStateChangeNotifier();
 
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Task.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Task.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Task.java
index b798fce..3af14b5 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Task.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Task.java
@@ -66,4 +66,5 @@ public interface Task {
 
   public void registerTezEvent(TezEvent tezEvent);
 
+  long getFirstAttemptStartTime();
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java
index 6c85cc2..26613e9 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java
@@ -122,6 +122,12 @@ public interface TaskAttempt {
    */
   long getLaunchTime();
 
+  /**
+   * Get the time at which this attempt was scheduled
+   * @return the time at which this attempt was scheduled, 0 if it hasn't been scheduled yet
+   */
+  long getScheduleTime();
+
   /** 
    * @return attempt's finish time. If attempt is not finished
    *  yet, returns 0.

http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
index 4d10711..7870697 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
@@ -696,6 +696,16 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
   }
 
   @Override
+  public long getStartTime() {
+    readLock.lock();
+    try {
+      return this.startTime;
+    } finally {
+      readLock.unlock();
+    }
+  }
+
+  @Override
   public StateChangeNotifier getStateChangeNotifier() {
     return entityUpdateTracker;
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
index 11d4df9..092520d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
@@ -135,6 +135,7 @@ public class TaskAttemptImpl implements TaskAttempt,
   protected final AppContext appContext;
   private final TaskHeartbeatHandler taskHeartbeatHandler;
   private long launchTime = 0;
+  private long scheduleTime = 0;
   private long finishTime = 0;
   private String trackerName;
   private int httpPort;
@@ -671,6 +672,16 @@ public class TaskAttemptImpl implements TaskAttempt,
   }
 
   @Override
+  public long getScheduleTime() {
+    readLock.lock();
+    try {
+      return scheduleTime;
+    } finally {
+      readLock.unlock();
+    }
+  }
+
+  @Override
   public long getFinishTime() {
     readLock.lock();
     try {
@@ -1030,6 +1041,7 @@ public class TaskAttemptImpl implements TaskAttempt,
     public TaskAttemptStateInternal transition(TaskAttemptImpl ta, TaskAttemptEvent event) {
       TaskAttemptEventSchedule scheduleEvent = (TaskAttemptEventSchedule) event;
 
+      ta.scheduleTime = ta.clock.getTime();
       // TODO Creating the remote task here may not be required in case of
       // recovery.
 

http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
index 2e884e7..4319eb3 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
@@ -1492,7 +1492,18 @@ public class TaskImpl implements Task, EventHandler<TaskEvent> {
       this.writeLock.unlock();
     }
   }
-  
+
+  @Override
+  public long getFirstAttemptStartTime() {
+    readLock.lock();
+    try {
+      // The first attempt will always have an index of 0.
+      return getAttempt(TezTaskAttemptID.getInstance(getTaskId(), 0)).getScheduleTime();
+    } finally {
+      readLock.unlock();
+    }
+  }
+
   private static class KillTransition
     implements SingleArcTransition<TaskImpl, TaskEvent> {
     @Override


[14/43] tez git commit: TEZ-776. Reduce AM mem usage caused by storing TezEvents (bikas)

Posted by ss...@apache.org.
TEZ-776. Reduce AM mem usage caused by storing TezEvents (bikas)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/05f77fe2
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/05f77fe2
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/05f77fe2

Branch: refs/heads/TEZ-2003
Commit: 05f77fe2b210341a16ead9fc51e53093c836d860
Parents: a382324
Author: Bikas Saha <bi...@apache.org>
Authored: Thu May 7 15:44:38 2015 -0700
Committer: Bikas Saha <bi...@apache.org>
Committed: Thu May 7 15:44:38 2015 -0700

----------------------------------------------------------------------
 CHANGES.txt                                     |   1 +
 tez-api/findbugs-exclude.xml                    |  12 +
 .../apache/tez/dag/api/EdgeManagerPlugin.java   |   4 +-
 .../tez/dag/api/EdgeManagerPluginOnDemand.java  | 332 +++++++++++++++++++
 .../api/events/CompositeDataMovementEvent.java  |  21 ++
 .../runtime/api/events/DataMovementEvent.java   |  33 +-
 .../runtime/api/events/InputFailedEvent.java    |  16 +
 tez-dag/findbugs-exclude.xml                    |   5 +-
 .../tez/dag/app/TaskAttemptEventInfo.java       |  40 +++
 .../dag/app/TaskAttemptListenerImpTezDag.java   |   6 +-
 .../java/org/apache/tez/dag/app/dag/Vertex.java |   5 +
 .../dag/app/dag/impl/BroadcastEdgeManager.java  |  41 ++-
 .../org/apache/tez/dag/app/dag/impl/Edge.java   | 244 ++++++++++++--
 .../dag/app/dag/impl/OneToOneEdgeManager.java   |  67 +++-
 .../app/dag/impl/ScatterGatherEdgeManager.java  |  83 ++++-
 .../apache/tez/dag/app/dag/impl/TaskImpl.java   |   6 -
 .../apache/tez/dag/app/dag/impl/VertexImpl.java | 323 ++++++++++++++----
 .../apache/tez/dag/app/MockDAGAppMaster.java    |  25 +-
 .../tez/dag/app/TestMemoryWithEvents.java       | 219 ++++++++++++
 .../tez/dag/app/TestMockDAGAppMaster.java       | 160 ++++++++-
 .../app/TestTaskAttemptListenerImplTezDag.java  |  33 +-
 .../tez/dag/app/dag/impl/TestDAGImpl.java       |  90 ++++-
 .../tez/dag/app/dag/impl/TestVertexImpl.java    | 208 +++++++++++-
 .../org/apache/tez/test/EdgeManagerForTest.java |  33 +-
 .../org/apache/tez/runtime/RuntimeTask.java     |  10 +
 .../runtime/api/impl/TezHeartbeatResponse.java  |  12 +
 .../apache/tez/runtime/task/TaskReporter.java   |  12 +-
 .../vertexmanager/ShuffleVertexManager.java     | 117 ++++++-
 .../tez/test/TestExceptionPropagation.java      |  39 ++-
 29 files changed, 2000 insertions(+), 197 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 8de61b0..ba8e9d8 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,7 @@ INCOMPATIBLE CHANGES
     Default max limit increased. Should not affect existing users.
 
 ALL CHANGES:
+  TEZ-776. Reduce AM mem usage caused by storing TezEvents
   TEZ-2423. Tez UI: Remove Attempt Index column from task->attempts page
   TEZ-2416. Tez UI: Make tooltips display faster.
   TEZ-2404. Handle DataMovementEvent before its TaskAttemptCompletedEvent

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-api/findbugs-exclude.xml
----------------------------------------------------------------------
diff --git a/tez-api/findbugs-exclude.xml b/tez-api/findbugs-exclude.xml
index b928a44..07792e6 100644
--- a/tez-api/findbugs-exclude.xml
+++ b/tez-api/findbugs-exclude.xml
@@ -85,4 +85,16 @@
     <Bug pattern="SWL_SLEEP_WITH_LOCK_HELD"/>
   </Match>
 
+  <Match>
+    <Class name="org.apache.tez.dag.api.EdgeManagerPluginOnDemand$EventRouteMetadata"/>
+    <Method name="getSourceIndices"/>
+    <Bug pattern="EI_EXPOSE_REP"/>
+  </Match>
+
+  <Match>
+    <Class name="org.apache.tez.dag.api.EdgeManagerPluginOnDemand$EventRouteMetadata"/>
+    <Method name="getTargetIndices"/>
+    <Bug pattern="EI_EXPOSE_REP"/>
+  </Match>
+
 </FindBugsFilter>

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPlugin.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPlugin.java b/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPlugin.java
index 8768e7d..4e22f63 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPlugin.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPlugin.java
@@ -101,7 +101,7 @@ public abstract class EdgeManagerPlugin {
   public abstract void routeDataMovementEventToDestination(DataMovementEvent event,
       int sourceTaskIndex, int sourceOutputIndex,
       Map<Integer, List<Integer>> destinationTaskAndInputIndices) throws Exception;
-  
+
   /**
    * Return the routing information to inform consumers about the failure of a
    * source task whose outputs have been potentially lost. The return map has
@@ -144,7 +144,7 @@ public abstract class EdgeManagerPlugin {
       int destinationTaskIndex, int destinationFailedInputIndex) throws Exception;
 
   /**
-   * Return ahe {@link org.apache.tez.dag.api.EdgeManagerPluginContext} for this specific instance of
+   * Return the {@link org.apache.tez.dag.api.EdgeManagerPluginContext} for this specific instance of
    * the vertex manager.
    *
    * @return the {@link org.apache.tez.dag.api.EdgeManagerPluginContext} for the input

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPluginOnDemand.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPluginOnDemand.java b/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPluginOnDemand.java
new file mode 100644
index 0000000..05c0c62
--- /dev/null
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPluginOnDemand.java
@@ -0,0 +1,332 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.tez.dag.api;
+
+import java.util.List;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.tez.runtime.api.Event;
+import org.apache.tez.runtime.api.events.CompositeDataMovementEvent;
+import org.apache.tez.runtime.api.events.DataMovementEvent;
+import org.apache.tez.runtime.api.events.InputFailedEvent;
+import org.apache.tez.runtime.api.events.InputReadErrorEvent;
+
+/**
+ * This interface defines the routing of the event between tasks of producer and 
+ * consumer vertices. The routing is bi-directional. Users can customize the 
+ * routing by providing an implementation of this interface.
+ */
+@Public
+@Unstable
+public abstract class EdgeManagerPluginOnDemand extends EdgeManagerPlugin {
+
+  /**
+   * Class to provide routing metadata for {@link Event}s to be routed between
+   * producer and consumer tasks. The routing data enabled the system to send 
+   * the event from the producer task output to the consumer task input
+   */
+  public static class EventRouteMetadata {
+    private final int numEvents;
+    private final int[] targetIndices;
+    private final int[] sourceIndices;
+    
+    /**
+     * Create an {@link EventRouteMetadata} that will create numEvents copies of
+     * the {@link Event} to be routed. Use this to create
+     * {@link EventRouteMetadata} for {@link DataMovementEvent}s or
+     * {@link InputFailedEvent}s where the target input indices must be
+     * specified to route those events. Typically numEvents would be 1 for these
+     * events.
+     * 
+     * @param numEvents
+     *          Number of copies of the event to be routed
+     * @param targetIndices
+     *          Target input indices. The array length must match the number of
+     *          events specified when creating the {@link EventRouteMetadata}
+     *          object
+     * @return {@link EventRouteMetadata}
+     */
+    public static EventRouteMetadata create(int numEvents, int[] targetIndices) {
+      return new EventRouteMetadata(numEvents, targetIndices, null);
+    }
+    
+    /**
+     * Create an {@link EventRouteMetadata} that will create numEvents copies of
+     * the {@link Event} to be routed. Use this to create
+     * {@link EventRouteMetadata} for {@link CompositeDataMovementEvent} where
+     * the target input indices and source output indices must be specified to
+     * route those events. Typically numEvents would be 1 for these events.
+     * 
+     * @param numEvents
+     *          Number of copies of the event to be routed
+     * @param targetIndices
+     *          Target input indices. The array length must match the number of
+     *          events specified when creating the {@link EventRouteMetadata}
+     *          object
+     * @param sourceIndices
+     *          Source output indices. The array length must match the number of
+     *          events specified when creating the {@link EventRouteMetadata}
+     *          object
+     * @return {@link EventRouteMetadata}
+     */
+    public static EventRouteMetadata create(int numEvents, int[] targetIndices, int[] sourceIndices) {
+      return new EventRouteMetadata(numEvents, targetIndices, sourceIndices);
+    }
+
+    private EventRouteMetadata(int numEvents, int[] targetIndices, int[] sourceIndices) {
+      this.numEvents = numEvents;
+      this.targetIndices = targetIndices;
+      this.sourceIndices = sourceIndices;
+    }
+
+    /**
+     * Get the number of copies of the event to be routed
+     * @return Number of copies
+     */
+    public int getNumEvents() {
+      return numEvents;
+    }
+
+    /**
+     * Get the target input indices
+     * @return Target input indices
+     */
+    public @Nullable int[] getTargetIndices() {
+      return targetIndices;
+    }
+
+    /**
+     * Get the source output indices
+     * @return Source output indices
+     */
+    public @Nullable int[] getSourceIndices() {
+      return sourceIndices;
+    }
+  }
+
+  /**
+   * Create an instance of the {@link EdgeManagerPluginOnDemand}. Classes
+   * extending this to create a {@link EdgeManagerPluginOnDemand}, must provide
+   * the same constructor so that Tez can create an instance of the class at
+   * runtime.
+   * 
+   * @param context
+   *          the context within which this {@link EdgeManagerPluginOnDemand}
+   *          will run. Includes information like configuration which the user
+   *          may have specified while setting up the edge.
+   */
+  public EdgeManagerPluginOnDemand(EdgeManagerPluginContext context) {
+    super(context);
+  }
+
+  /**
+   * Initializes the EdgeManagerPlugin. This method is called in the following
+   * circumstances </p> 1. when initializing an EdgeManagerPlugin for the first time.
+   * </p> 2. When an EdgeManagerPlugin is replaced at runtime. At this point, an
+   * EdgeManagerPlugin instance is created and setup by the user. The initialize
+   * method will be called with the original {@link EdgeManagerPluginContext} when the
+   * EdgeManagerPlugin is replaced.
+   * @throws Exception
+   */
+  public abstract void initialize() throws Exception;
+  
+  /**
+   * This method will be invoked just before routing of events will begin. The
+   * plugin can use this opportunity to make any runtime initialization's that
+   * depend on the actual state of the DAG or vertices.
+   */
+  public abstract void prepareForRouting() throws Exception;
+
+  /**
+   * Get the number of physical inputs on the destination task
+   * @param destinationTaskIndex Index of destination task for which number of 
+   * inputs is needed
+   * @return Number of physical inputs on the destination task
+   * @throws Exception
+   */
+  public abstract int getNumDestinationTaskPhysicalInputs(int destinationTaskIndex) throws Exception;
+
+  /**
+   * Get the number of physical outputs on the source task
+   * @param sourceTaskIndex Index of the source task for which number of outputs 
+   * is needed
+   * @return Number of physical outputs on the source task
+   * @throws Exception
+   */
+  public abstract int getNumSourceTaskPhysicalOutputs(int sourceTaskIndex) throws Exception;
+  
+  /**
+   * Get the number of destination tasks that consume data from the source task
+   * @param sourceTaskIndex Source task index
+   * @throws Exception
+   */
+  public abstract int getNumDestinationConsumerTasks(int sourceTaskIndex) throws Exception;
+  
+  /**
+   * Return the source task index to which to send the input error event
+   * 
+   * @param destinationTaskIndex
+   *          Destination task that reported the error
+   * @param destinationFailedInputIndex
+   *          Index of the physical input on the destination task that reported 
+   *          the error
+   * @return Index of the source task that created the unavailable input
+   * @throws Exception
+   */
+  public abstract int routeInputErrorEventToSource(int destinationTaskIndex,
+      int destinationFailedInputIndex) throws Exception;
+  
+  /**
+   * The method provides the {@link EventRouteMetadata} to route a
+   * {@link DataMovementEvent} produced by the given source task to the given
+   * destination task. The returned {@link EventRouteMetadata} should have the
+   * target input indices set to enable the routing. If the routing metadata is
+   * common across different events then the plugin can cache and reuse the same
+   * object.
+   * 
+   * @param sourceTaskIndex
+   *          The index of the task in the source vertex of this edge that
+   *          produced a {@link DataMovementEvent}
+   * @param sourceOutputIndex
+   *          Index of the physical output on the source task that produced the
+   *          event
+   * @param destinationTaskIndex
+   * @return {@link EventRouteMetadata} with target indices set. Maybe null if
+   *         the given destination task does not read input from the given
+   *         source task.
+   * @throws Exception
+   */
+  public abstract @Nullable EventRouteMetadata routeDataMovementEventToDestination(int sourceTaskIndex,
+      int sourceOutputIndex, int destinationTaskIndex) throws Exception;
+
+  /**
+   * The method provides the {@link EventRouteMetadata} to route a
+   * {@link CompositeDataMovementEvent} produced by the given source task to the
+   * given destination task. The returned {@link EventRouteMetadata} should have
+   * the target input indices and source output indices set to enable the
+   * routing. If the routing metadata is common across different events then the
+   * plugin can cache and reuse the same object.
+   * 
+   * @param sourceTaskIndex
+   *          The index of the task in the source vertex of this edge that
+   *          produced a {@link CompositeDataMovementEvent}
+   * @param destinationTaskIndex
+   *          The index of the task in the destination vertex of this edge
+   * @return {@link EventRouteMetadata} with source and target indices set. This
+   *         may be null if the destination task does not read data from the
+   *         source task.
+   * @throws Exception
+   */
+  public abstract @Nullable EventRouteMetadata routeCompositeDataMovementEventToDestination(
+      int sourceTaskIndex, int destinationTaskIndex) throws Exception;
+
+  /**
+   * The method provides the {@link EventRouteMetadata} to route an
+   * {@link InputFailedEvent} produced by the given source task to the given
+   * destination task. The returned {@link EventRouteMetadata} should have the
+   * target input indices set to enable the routing. If the routing metadata is
+   * common across different events then the plugin can cache and reuse the same
+   * object.
+   * 
+   * @param sourceTaskIndex
+   *          The index of the failed task in the source vertex of this edge.
+   * @param destinationTaskIndex
+   *          The index of a task in the destination vertex of this edge.
+   * @return {@link EventRouteMetadata} with target indices set. Maybe null if
+   *         the given destination task does not read input from the given
+   *         source task.
+   * @throws Exception
+   */
+  public abstract @Nullable EventRouteMetadata routeInputSourceTaskFailedEventToDestination(
+      int sourceTaskIndex, int destinationTaskIndex) throws Exception;
+  
+  /**
+   * Return the {@link org.apache.tez.dag.api.EdgeManagerPluginContext} for this specific instance of
+   * the vertex manager.
+   *
+   * @return the {@link org.apache.tez.dag.api.EdgeManagerPluginContext} for the input
+   */
+  public EdgeManagerPluginContext getContext() {
+    return super.getContext();
+  }
+
+  // Empty implementations of EdgeManagerPlugin interfaces that are not needed
+  /**
+   * Return the routing information to inform consumers about the source task
+   * output that is now available. The return map has the routing information.
+   * The event will be routed to every destination task index in the key of the
+   * map. Every physical input in the value for that task key will receive the
+   * input.
+   * 
+   * @param event
+   *          Data movement event that contains the output information
+   * @param sourceTaskIndex
+   *          Source task that produced the event
+   * @param sourceOutputIndex
+   *          Index of the physical output on the source task that produced the
+   *          event
+   * @param destinationTaskAndInputIndices
+   *          Map via which the routing information is returned
+   * @throws Exception
+   */
+  public void routeDataMovementEventToDestination(DataMovementEvent event,
+      int sourceTaskIndex, int sourceOutputIndex,
+      Map<Integer, List<Integer>> destinationTaskAndInputIndices) throws Exception {}
+
+  /**
+   * Return the routing information to inform consumers about the failure of a
+   * source task whose outputs have been potentially lost. The return map has
+   * the routing information. The failure notification event will be sent to
+   * every task index in the key of the map. Every physical input in the value
+   * for that task key will receive the failure notification. This method will
+   * be called once for every source task failure and information for all
+   * affected destinations must be provided in that invocation.
+   * 
+   * @param sourceTaskIndex
+   *          Source task
+   * @param destinationTaskAndInputIndices
+   *          Map via which the routing information is returned
+   * @throws Exception
+   */
+  public void routeInputSourceTaskFailedEventToDestination(int sourceTaskIndex,
+      Map<Integer, List<Integer>> destinationTaskAndInputIndices) throws Exception {}
+
+  /**
+   * Return the source task index to which to send the input error event
+   * 
+   * @param event
+   *          Input read error event. Has more information about the error
+   * @param destinationTaskIndex
+   *          Destination task that reported the error
+   * @param destinationFailedInputIndex
+   *          Index of the physical input on the destination task that reported 
+   *          the error
+   * @return Index of the source task that created the unavailable input
+   * @throws Exception
+   */
+  public int routeInputErrorEventToSource(InputReadErrorEvent event,
+      int destinationTaskIndex, int destinationFailedInputIndex) throws Exception { 
+    return routeInputErrorEventToSource(destinationTaskIndex, destinationFailedInputIndex);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-api/src/main/java/org/apache/tez/runtime/api/events/CompositeDataMovementEvent.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/events/CompositeDataMovementEvent.java b/tez-api/src/main/java/org/apache/tez/runtime/api/events/CompositeDataMovementEvent.java
index b38fda3..c45d272 100644
--- a/tez-api/src/main/java/org/apache/tez/runtime/api/events/CompositeDataMovementEvent.java
+++ b/tez-api/src/main/java/org/apache/tez/runtime/api/events/CompositeDataMovementEvent.java
@@ -21,6 +21,7 @@ package org.apache.tez.runtime.api.events;
 import java.nio.ByteBuffer;
 import java.util.Iterator;
 
+import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.tez.runtime.api.Event;
 
@@ -65,6 +66,26 @@ public class CompositeDataMovementEvent extends Event {
                                                   ByteBuffer userPayload) {
     return new CompositeDataMovementEvent(srcIndexStart, count, userPayload);
   }
+  
+  /**
+   * Expand the {@link CompositeDataMovementEvent} into a routable
+   * {@link DataMovementEvent} by providing the source output index and the
+   * target input index.
+   * 
+   * @param sourceIndex
+   *          The index of the physical output represented by the
+   *          {@link DataMovementEvent}
+   * @param targetIndex
+   *          The index of the physical input to which the given
+   *          {@link DataMovementEvent} should be routed.
+   * @return {@link DataMovementEvent} created from the
+   *         {@link CompositeDataMovementEvent} with indices specified by the
+   *         method parameters
+   */
+  @Private
+  public DataMovementEvent expand(int sourceIndex, int targetIndex) {
+    return new DataMovementEvent(sourceIndex, targetIndex, version, userPayload);
+  }
 
   public int getSourceIndexStart() {
     return sourceIndexStart;

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-api/src/main/java/org/apache/tez/runtime/api/events/DataMovementEvent.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/events/DataMovementEvent.java b/tez-api/src/main/java/org/apache/tez/runtime/api/events/DataMovementEvent.java
index b9c1cc4..05c3d3f 100644
--- a/tez-api/src/main/java/org/apache/tez/runtime/api/events/DataMovementEvent.java
+++ b/tez-api/src/main/java/org/apache/tez/runtime/api/events/DataMovementEvent.java
@@ -23,6 +23,7 @@ import java.nio.ByteBuffer;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.tez.runtime.api.Event;
+import org.apache.tez.runtime.api.Output;
 
 /**
  * Event used by user code to send information between tasks. An output can
@@ -56,14 +57,8 @@ public final class DataMovementEvent extends Event {
   private int version;
 
 
-  private DataMovementEvent(int sourceIndex,
-                            ByteBuffer userPayload) {
-    this.userPayload = userPayload;
-    this.sourceIndex = sourceIndex;
-  }
-
   @Private
-  private DataMovementEvent(int sourceIndex,
+  DataMovementEvent(int sourceIndex,
                             int targetIndex,
                             int version,
                             ByteBuffer userPayload) {
@@ -74,20 +69,21 @@ public final class DataMovementEvent extends Event {
   }
 
   private DataMovementEvent(ByteBuffer userPayload) {
-    this(-1, userPayload);
+    this(-1, -1, -1, userPayload);
   }
 
   /**
-   * User Event constructor
+   * User Event constructor for {@link Output}s
    * @param sourceIndex Index to identify the physical edge of the input/output
    * that generated the event
    * @param userPayload User Payload of the User Event
    */
   public static DataMovementEvent create(int sourceIndex,
                                          ByteBuffer userPayload) {
-    return new DataMovementEvent(sourceIndex, userPayload);
+    return new DataMovementEvent(sourceIndex, -1, -1, userPayload);
   }
-
+  
+  @Private
   /**
    * Constructor for Processor-generated User Events
    * @param userPayload
@@ -103,6 +99,21 @@ public final class DataMovementEvent extends Event {
                                          ByteBuffer userPayload) {
     return new DataMovementEvent(sourceIndex, targetIndex, version, userPayload);
   }
+  
+  /**
+   * Make a routable copy of the {@link DataMovementEvent} by adding a target
+   * input index
+   * 
+   * @param targetIndex
+   *          The index of the physical input to which this
+   *          {@link DataMovementEvent} should be routed
+   * @return Copy of this {@link DataMovementEvent} with the target input index
+   *         added to it
+   */
+  @Private
+  public DataMovementEvent makeCopy(int targetIndex) {
+    return new DataMovementEvent(sourceIndex, targetIndex, version, userPayload);
+  }
 
   public ByteBuffer getUserPayload() {
     return userPayload == null ? null : userPayload.asReadOnlyBuffer();

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputFailedEvent.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputFailedEvent.java b/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputFailedEvent.java
index 639d0b9..9d8363a 100644
--- a/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputFailedEvent.java
+++ b/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputFailedEvent.java
@@ -55,6 +55,22 @@ public class InputFailedEvent extends Event{
   public static InputFailedEvent create(int targetIndex, int version) {
     return new InputFailedEvent(targetIndex, version);
   }
+  
+  /**
+   * Create a copy of the {@link InputFailedEvent} by adding a target input
+   * index The index of the physical input to which this event should be routed
+   * 
+   * @param targetIndex
+   *          The index of the physical input to which this
+   *          {@link InputFailedEvent} should be routed
+   * 
+   * @return copy of the {@link InputFailedEvent} with the target input index
+   *         added
+   */
+  @Private
+  public InputFailedEvent makeCopy(int targetIndex) {
+    return create(targetIndex, version);
+  }
 
   public int getTargetIndex() {
     return targetIndex;

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/findbugs-exclude.xml
----------------------------------------------------------------------
diff --git a/tez-dag/findbugs-exclude.xml b/tez-dag/findbugs-exclude.xml
index 233f76c..57c0aca 100644
--- a/tez-dag/findbugs-exclude.xml
+++ b/tez-dag/findbugs-exclude.xml
@@ -118,7 +118,10 @@
   <!-- TEZ-1952 -->
   <Match>
     <Class name="org.apache.tez.dag.app.dag.impl.Edge"/>
-    <Field name="edgeProperty"/>
+    <Or>
+      <Field name="edgeProperty"/>
+      <Field name="onDemandRouting"/>
+    </Or>
     <Bug pattern="IS2_INCONSISTENT_SYNC"/>
   </Match>
 

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptEventInfo.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptEventInfo.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptEventInfo.java
new file mode 100644
index 0000000..49ff044
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptEventInfo.java
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.tez.dag.app;
+
+import java.util.List;
+
+import org.apache.tez.runtime.api.impl.TezEvent;
+
+public class TaskAttemptEventInfo {
+  private final int nextFromEventId;
+  private final List<TezEvent> events;
+  
+  public TaskAttemptEventInfo(int nextFromEventId, List<TezEvent> events) {
+    this.nextFromEventId = nextFromEventId;
+    this.events = events;
+  }
+  
+  public int getNextFromEventId() {
+    return nextFromEventId;
+  }
+  
+  public List<TezEvent> getEvents() {
+    return events;
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index b38081b..970489d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -444,13 +444,13 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
               new VertexEventRouteEvent(vertexId, Collections.unmodifiableList(otherEvents)));
         }
         taskHeartbeatHandler.pinged(taskAttemptID);
-        List<TezEvent> outEvents = context
+        TaskAttemptEventInfo eventInfo = context
             .getCurrentDAG()
             .getVertex(taskAttemptID.getTaskID().getVertexID())
-            .getTask(taskAttemptID.getTaskID())
             .getTaskAttemptTezEvents(taskAttemptID, request.getStartIndex(),
                 request.getMaxEvents());
-        response.setEvents(outEvents);
+        response.setEvents(eventInfo.getEvents());
+        response.setNextFromEventId(eventInfo.getNextFromEventId());
       }
       containerInfo.lastRequestId = requestId;
       containerInfo.lastReponse = response;

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
index 77ef6e0..bb42392 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
@@ -45,10 +45,12 @@ import org.apache.tez.dag.api.records.DAGProtos.VertexPlan;
 import org.apache.tez.dag.api.client.ProgressBuilder;
 import org.apache.tez.dag.api.client.VertexStatusBuilder;
 import org.apache.tez.dag.app.AppContext;
+import org.apache.tez.dag.app.TaskAttemptEventInfo;
 import org.apache.tez.dag.app.dag.event.SpeculatorEvent;
 import org.apache.tez.dag.app.dag.impl.AMUserCodeException;
 import org.apache.tez.dag.app.dag.impl.Edge;
 import org.apache.tez.dag.history.HistoryEvent;
+import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
 import org.apache.tez.dag.records.TezVertexID;
 import org.apache.tez.runtime.api.OutputCommitter;
@@ -144,6 +146,9 @@ public interface Vertex extends Comparable<Vertex> {
   void scheduleSpeculativeTask(TezTaskID taskId);
   Resource getTaskResource();
   
+  public TaskAttemptEventInfo getTaskAttemptTezEvents(TezTaskAttemptID attemptID,
+      int fromEventId, int maxEvents);
+  
   void handleSpeculatorEvent(SpeculatorEvent event);
 
   ProcessorDescriptor getProcessorDescriptor();

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/BroadcastEdgeManager.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/BroadcastEdgeManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/BroadcastEdgeManager.java
index db57227..d14527d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/BroadcastEdgeManager.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/BroadcastEdgeManager.java
@@ -22,12 +22,14 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.tez.dag.api.EdgeManagerPlugin;
 import org.apache.tez.dag.api.EdgeManagerPluginContext;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand;
 import org.apache.tez.runtime.api.events.DataMovementEvent;
 import org.apache.tez.runtime.api.events.InputReadErrorEvent;
 
-public class BroadcastEdgeManager extends EdgeManagerPlugin {
+public class BroadcastEdgeManager extends EdgeManagerPluginOnDemand {
+
+  EventRouteMetadata[] commonRouteMeta;
 
   public BroadcastEdgeManager(EdgeManagerPluginContext context) {
     super(context);
@@ -60,6 +62,35 @@ public class BroadcastEdgeManager extends EdgeManagerPlugin {
   }
   
   @Override
+  public void prepareForRouting() throws Exception {
+    int numSourceTasks = getContext().getSourceVertexNumTasks();
+    commonRouteMeta = new EventRouteMetadata[numSourceTasks];
+    for (int i=0; i<numSourceTasks; ++i) {
+      commonRouteMeta[i] = EventRouteMetadata.create(1, new int[]{i}, new int[]{0});
+    }
+  }
+  
+  @Override
+  public EventRouteMetadata routeDataMovementEventToDestination(
+      int sourceTaskIndex, int sourceOutputIndex, int destinationTaskIndex)
+      throws Exception {
+    return commonRouteMeta[sourceTaskIndex];
+  }
+  
+  @Override
+  public EventRouteMetadata routeCompositeDataMovementEventToDestination(
+      int sourceTaskIndex, int destinationTaskIndex)
+      throws Exception {
+    return commonRouteMeta[sourceTaskIndex];
+  }
+
+  @Override
+  public EventRouteMetadata routeInputSourceTaskFailedEventToDestination(
+      int sourceTaskIndex, int destinationTaskIndex) throws Exception {
+    return commonRouteMeta[sourceTaskIndex];
+  }
+
+  @Override
   public void routeInputSourceTaskFailedEventToDestination(int sourceTaskIndex,
       Map<Integer, List<Integer>> destinationTaskAndInputIndices) {
     List<Integer> inputIndices = 
@@ -71,6 +102,12 @@ public class BroadcastEdgeManager extends EdgeManagerPlugin {
   }
 
   @Override
+  public int routeInputErrorEventToSource(int destinationTaskIndex, int destinationFailedInputIndex)
+      throws Exception {
+    return destinationFailedInputIndex;
+  }
+
+  @Override
   public int routeInputErrorEventToSource(InputReadErrorEvent event,
       int destinationTaskIndex, int destinationFailedInputIndex) {
     return destinationFailedInputIndex;

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java
index f5fef67..78bab05 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java
@@ -21,6 +21,7 @@ package org.apache.tez.dag.app.dag.impl;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.slf4j.Logger;
@@ -30,9 +31,11 @@ import org.apache.tez.common.ReflectionUtils;
 import org.apache.tez.dag.api.EdgeManagerPlugin;
 import org.apache.tez.dag.api.EdgeManagerPluginContext;
 import org.apache.tez.dag.api.EdgeManagerPluginDescriptor;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand;
 import org.apache.tez.dag.api.EdgeProperty;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.api.UserPayload;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand.EventRouteMetadata;
 import org.apache.tez.dag.app.dag.Task;
 import org.apache.tez.dag.app.dag.Vertex;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventOutputFailed;
@@ -97,7 +100,9 @@ public class Edge {
 
   private EdgeProperty edgeProperty;
   private EdgeManagerPluginContext edgeManagerContext;
-  private EdgeManagerPlugin edgeManager;
+  @VisibleForTesting
+  EdgeManagerPlugin edgeManager;
+  private boolean onDemandRouting = false;
   @SuppressWarnings("rawtypes")
   private EventHandler eventHandler;
   private AtomicBoolean bufferEvents = new AtomicBoolean(false);
@@ -106,6 +111,9 @@ public class Edge {
   private Vertex sourceVertex;
   private Vertex destinationVertex; // this may end up being a list for shared edge
   private EventMetaData destinationMetaInfo;
+  private boolean routingNeeded = true;
+  private final ConcurrentMap<TezTaskAttemptID, PendingEventRouteMetadata> pendingEvents = Maps
+      .newConcurrentMap();
 
   @SuppressWarnings("rawtypes")
   public Edge(EdgeProperty edgeProperty, EventHandler eventHandler) {
@@ -188,6 +196,29 @@ public class Edge {
             edgeProperty.getEdgeDestination());
     setEdgeProperty(modifiedEdgeProperty);
   }
+  
+  public synchronized boolean routingToBegin() throws AMUserCodeException {
+    if (edgeManagerContext.getDestinationVertexNumTasks() == 0) {
+      routingNeeded = false;
+    } else if (edgeManagerContext.getDestinationVertexNumTasks() < 0) {
+      throw new TezUncheckedException(
+          "Internal error. Not expected to route events to a destination until parallelism is determined" +
+          " sourceVertex=" + sourceVertex.getLogIdentifier() +
+          " edgeManager=" + edgeManager.getClass().getName());
+    }
+    if (edgeManager instanceof EdgeManagerPluginOnDemand) {
+      onDemandRouting = true;
+      try {
+        ((EdgeManagerPluginOnDemand)edgeManager).prepareForRouting();
+      } catch (Exception e) {
+        throw new AMUserCodeException(Source.EdgeManager,
+            "Fail to prepareForRouting " + getEdgeInfo(), e);
+      }
+    }
+    
+    LOG.info("Routing to begin for edge: " + getEdgeInfo() + ". EdgeProperty: " + edgeProperty);
+    return onDemandRouting;
+  }
 
   public synchronized EdgeProperty getEdgeProperty() {
     return this.edgeProperty;
@@ -280,8 +311,13 @@ public class Edge {
         int srcTaskIndex;
         int numConsumers;
         try {
-          srcTaskIndex = edgeManager.routeInputErrorEventToSource(event,
-              destTaskIndex, event.getIndex());
+          if (onDemandRouting) {
+            srcTaskIndex = ((EdgeManagerPluginOnDemand) edgeManager).routeInputErrorEventToSource(
+                destTaskIndex, event.getIndex());            
+          } else {
+            srcTaskIndex = edgeManager.routeInputErrorEventToSource(event,
+                destTaskIndex, event.getIndex());
+          }
           Preconditions.checkArgument(srcTaskIndex >= 0,
               "SourceTaskIndex should not be negative,"
               + "srcTaskIndex=" + srcTaskIndex);
@@ -340,7 +376,6 @@ public class Edge {
     Preconditions.checkState(edgeManager != null, 
         "Edge Manager must be initialized by this time");
     Event event = tezEvent.getEvent();
-    boolean isFirstEvent = true;
     // cache of event object per input index
     Map<Integer, TezEvent> inputIndicesWithEvents = Maps.newHashMap(); 
     for (Map.Entry<Integer, List<Integer>> entry : taskAndInputIndices.entrySet()) {
@@ -350,28 +385,16 @@ public class Edge {
         Integer inputIndex = inputIndices.get(i);
         TezEvent tezEventToSend = inputIndicesWithEvents.get(inputIndex);
         if (tezEventToSend == null) {
-          if (isFirstEvent) {
-            isFirstEvent = false;
-            // this is the first item - reuse the event object
-            if (isDataMovementEvent) {
-              ((DataMovementEvent) event).setTargetIndex(inputIndex);
-            } else {
-              ((InputFailedEvent) event).setTargetIndex(inputIndex);
-            }
-            tezEventToSend = tezEvent;
+          Event e;
+          if (isDataMovementEvent) {
+            DataMovementEvent dmEvent = (DataMovementEvent) event;
+            e = DataMovementEvent.create(dmEvent.getSourceIndex(),
+                inputIndex, dmEvent.getVersion(), dmEvent.getUserPayload());
           } else {
-            // create new event object for this input index
-            Event e;
-            if (isDataMovementEvent) {
-              DataMovementEvent dmEvent = (DataMovementEvent) event;
-              e = DataMovementEvent.create(dmEvent.getSourceIndex(),
-                  inputIndex, dmEvent.getVersion(), dmEvent.getUserPayload());
-            } else {
-              InputFailedEvent ifEvent = ((InputFailedEvent) event);
-              e = InputFailedEvent.create(inputIndex, ifEvent.getVersion());
-            }
-            tezEventToSend = new TezEvent(e, tezEvent.getSourceInfo());
+            InputFailedEvent ifEvent = ((InputFailedEvent) event);
+            e = InputFailedEvent.create(inputIndex, ifEvent.getVersion());
           }
+          tezEventToSend = new TezEvent(e, tezEvent.getSourceInfo());
           tezEventToSend.setDestinationInfo(destinationMetaInfo);
           // cache the event object per input because are unique per input index
           inputIndicesWithEvents.put(inputIndex, tezEventToSend);
@@ -392,8 +415,6 @@ public class Edge {
   }
   
   public void sendTezEventToDestinationTasks(TezEvent tezEvent) throws AMUserCodeException {
-    Preconditions.checkState(edgeManager != null, 
-        "Edge Manager must be initialized by this time");
     if (!bufferEvents.get()) {
       boolean isDataMovementEvent = true;
       switch (tezEvent.getEventType()) {
@@ -411,16 +432,7 @@ public class Edge {
             .getTaskAttemptID();
         int srcTaskIndex = srcAttemptId.getTaskID().getId();
 
-        boolean routingRequired = true;
-        if (edgeManagerContext.getDestinationVertexNumTasks() == 0) {
-          routingRequired = false;
-          LOG.info("Not routing events since destination vertex has 0 tasks" +
-              generateCommonDebugString(srcTaskIndex, tezEvent));
-        } else if (edgeManagerContext.getDestinationVertexNumTasks() < 0) {
-          throw new TezUncheckedException(
-              "Internal error. Not expected to route events to a destination until parallelism is determined" +
-                  generateCommonDebugString(srcTaskIndex, tezEvent));
-        }
+        boolean routingRequired = routingNeeded;
 
         if (routingRequired) {
           try {
@@ -439,6 +451,9 @@ public class Edge {
                 + ", sourceInfo:" + tezEvent.getSourceInfo() + ", destinationInfo:"
                 + tezEvent.getDestinationInfo() + ", " + getEdgeInfo(), e);
           }
+        } else {
+          LOG.info("Not routing events since destination vertex has 0 tasks" +
+              generateCommonDebugString(srcTaskIndex, tezEvent));
         }
 
         if (!destTaskAndInputIndices.isEmpty()) {
@@ -459,6 +474,163 @@ public class Edge {
     }
   }
   
+  static class PendingEventRouteMetadata {
+    private final EventRouteMetadata routeMeta;
+    private final TezEvent event;
+    private int numEventsRouted;
+    
+    public PendingEventRouteMetadata(EventRouteMetadata routeMeta, TezEvent event,
+        int numEventsRouted) {
+      this.routeMeta = routeMeta;
+      this.event = event;
+      this.numEventsRouted = numEventsRouted;
+    }
+    
+    public EventRouteMetadata getRouteMeta() {
+      return routeMeta;
+    }
+    
+    public TezEvent getTezEvent() {
+      return event;
+    }
+    
+    public int getNumEventsRouted() {
+      return numEventsRouted;
+    }
+  }
+
+  public PendingEventRouteMetadata removePendingEvents(TezTaskAttemptID attemptID) {
+    return pendingEvents.remove(attemptID);
+  }
+  
+  // return false is event could be routed but ran out of space in the list
+  public boolean maybeAddTezEventForDestinationTask(TezEvent tezEvent, TezTaskAttemptID attemptID,
+      int srcTaskIndex, List<TezEvent> listToAdd, int listMaxSize, 
+      PendingEventRouteMetadata pendingRoutes) 
+          throws AMUserCodeException {
+    if (!routingNeeded) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Not routing events since destination vertex has 0 tasks" +
+            generateCommonDebugString(srcTaskIndex, tezEvent));
+      }
+      return true;
+    } else {
+      try {
+        EdgeManagerPluginOnDemand edgeManagerOnDemand = (EdgeManagerPluginOnDemand) edgeManager;
+        int taskIndex = attemptID.getTaskID().getId();
+        switch (tezEvent.getEventType()) {
+        case COMPOSITE_DATA_MOVEMENT_EVENT:
+          {
+            CompositeDataMovementEvent compEvent = (CompositeDataMovementEvent) tezEvent.getEvent();        
+            EventRouteMetadata routeMeta;
+            int numEventsDone;
+            if (pendingRoutes != null) {
+              routeMeta = pendingRoutes.getRouteMeta();
+              numEventsDone = pendingRoutes.getNumEventsRouted();
+            } else {
+              routeMeta = edgeManagerOnDemand
+                  .routeCompositeDataMovementEventToDestination(srcTaskIndex, taskIndex);
+              numEventsDone = 0;
+            }
+            if (routeMeta != null) {
+              int listSize = listToAdd.size();
+              int numEvents = routeMeta.getNumEvents();
+              int[] sourceIndices = routeMeta.getSourceIndices();
+              int[] targetIndices = routeMeta.getTargetIndices();
+              while (numEventsDone < numEvents && listSize++ < listMaxSize) {
+                DataMovementEvent e = compEvent.expand(sourceIndices[numEventsDone],
+                    targetIndices[numEventsDone]);
+                numEventsDone++;
+                TezEvent tezEventToSend = new TezEvent(e, tezEvent.getSourceInfo());
+                tezEventToSend.setDestinationInfo(destinationMetaInfo);
+                listToAdd.add(tezEventToSend);
+              }
+              if (numEventsDone < numEvents) {
+                pendingEvents.put(attemptID, new PendingEventRouteMetadata(routeMeta, tezEvent,
+                    numEventsDone));
+                return false;
+              }
+            }
+          }
+          break;
+        case INPUT_FAILED_EVENT:
+          {
+            InputFailedEvent ifEvent = (InputFailedEvent) tezEvent.getEvent();
+            EventRouteMetadata routeMeta;
+            int numEventsDone;
+            if (pendingRoutes != null) {
+              routeMeta = pendingRoutes.getRouteMeta();
+              numEventsDone = pendingRoutes.getNumEventsRouted();
+            } else {
+              routeMeta = edgeManagerOnDemand.routeInputSourceTaskFailedEventToDestination(
+                  srcTaskIndex, taskIndex);
+              numEventsDone = 0;
+            }
+            if (routeMeta != null) {
+              int listSize = listToAdd.size();
+              int numEvents = routeMeta.getNumEvents();
+              int[] targetIndices = routeMeta.getTargetIndices();
+              while (numEventsDone < numEvents && listSize++ < listMaxSize) {
+                InputFailedEvent e = ifEvent.makeCopy(targetIndices[numEventsDone]);
+                numEventsDone++;
+                TezEvent tezEventToSend = new TezEvent(e, tezEvent.getSourceInfo());
+                tezEventToSend.setDestinationInfo(destinationMetaInfo);
+                listToAdd.add(tezEventToSend);
+              }
+              if (numEventsDone < numEvents) {
+                pendingEvents.put(attemptID, new PendingEventRouteMetadata(routeMeta, tezEvent,
+                    numEventsDone));
+                return false;
+              }
+            }
+          }
+          break;
+        case DATA_MOVEMENT_EVENT:
+          {
+            DataMovementEvent dmEvent = (DataMovementEvent) tezEvent.getEvent();
+            EventRouteMetadata routeMeta;
+            int numEventsDone;
+            if (pendingRoutes != null) {
+              routeMeta = pendingRoutes.getRouteMeta();
+              numEventsDone = pendingRoutes.getNumEventsRouted();
+            } else {
+              routeMeta = edgeManagerOnDemand.routeDataMovementEventToDestination(srcTaskIndex,
+                  dmEvent.getSourceIndex(), taskIndex);
+              numEventsDone = 0;
+            }
+            if (routeMeta != null) {
+              int listSize = listToAdd.size();
+              int numEvents = routeMeta.getNumEvents();
+              int[] targetIndices = routeMeta.getTargetIndices();
+              while (numEventsDone < numEvents && listSize++ < listMaxSize) {
+                DataMovementEvent e = dmEvent.makeCopy(targetIndices[numEventsDone]);
+                numEventsDone++;
+                TezEvent tezEventToSend = new TezEvent(e, tezEvent.getSourceInfo());
+                tezEventToSend.setDestinationInfo(destinationMetaInfo);
+                listToAdd.add(tezEventToSend);
+              }
+              if (numEventsDone < numEvents) {
+                pendingEvents.put(attemptID, new PendingEventRouteMetadata(routeMeta, tezEvent,
+                    numEventsDone));
+                return false;
+              }
+            }
+          }
+          break;
+        default:
+          throw new TezUncheckedException("Unhandled tez event type: "
+              + tezEvent.getEventType());
+        }
+      } catch (Exception e){
+        throw new AMUserCodeException(Source.EdgeManager,
+            "Fail to maybeAddTezEventForDestinationTask, event:" + tezEvent.getEvent()
+            + ", sourceInfo:" + tezEvent.getSourceInfo() + ", destinationInfo:"
+            + tezEvent.getDestinationInfo() + ", " + getEdgeInfo(), e);
+      }
+    }
+    return true;
+  }
+
   private void sendEventToTask(Task task, TezEvent tezEvent) {
     task.registerTezEvent(tezEvent);
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManager.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManager.java
index 11a6483..6053806 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManager.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManager.java
@@ -21,18 +21,25 @@ package org.apache.tez.dag.app.dag.impl;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import javax.annotation.Nullable;
 
-import org.apache.tez.dag.api.EdgeManagerPlugin;
 import org.apache.tez.dag.api.EdgeManagerPluginContext;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand;
 import org.apache.tez.runtime.api.events.DataMovementEvent;
 import org.apache.tez.runtime.api.events.InputReadErrorEvent;
 
 import com.google.common.base.Preconditions;
 
-public class OneToOneEdgeManager extends EdgeManagerPlugin {
+public class OneToOneEdgeManager extends EdgeManagerPluginOnDemand {
 
   List<Integer> destinationInputIndices = 
       Collections.unmodifiableList(Collections.singletonList(0));
+  AtomicBoolean stateChecked = new AtomicBoolean(false);
+ 
+  final EventRouteMetadata commonRouteMeta = 
+      EventRouteMetadata.create(1, new int[]{0}, new int[]{0});
 
   public OneToOneEdgeManager(EdgeManagerPluginContext context) {
     super(context);
@@ -57,16 +64,42 @@ public class OneToOneEdgeManager extends EdgeManagerPlugin {
   public void routeDataMovementEventToDestination(DataMovementEvent event,
       int sourceTaskIndex, int sourceOutputIndex, 
       Map<Integer, List<Integer>> destinationTaskAndInputIndices) {
-    // by the time routing is initiated all task counts must be determined and stable
-    Preconditions.checkState(getContext().getSourceVertexNumTasks() == getContext()
-        .getDestinationVertexNumTasks(), "1-1 source and destination task counts must match."
-        + " Destination: " + getContext().getDestinationVertexName() + " tasks: "
-        + getContext().getDestinationVertexNumTasks() + " Source: "
-        + getContext().getSourceVertexName() + " tasks: " + getContext().getSourceVertexNumTasks());
+    checkState();
     destinationTaskAndInputIndices.put(sourceTaskIndex, destinationInputIndices);
   }
   
   @Override
+  public void prepareForRouting() throws Exception {
+    checkState();
+  }
+  
+  @Override
+  public EventRouteMetadata routeDataMovementEventToDestination(
+      int sourceTaskIndex, int sourceOutputIndex, int destinationTaskIndex)
+      throws Exception {
+    if (sourceTaskIndex == destinationTaskIndex) {
+      return commonRouteMeta;
+    }
+    return null;
+  }
+  
+  @Override
+  public @Nullable EventRouteMetadata routeCompositeDataMovementEventToDestination(
+      int sourceTaskIndex, int destinationTaskIndex)
+      throws Exception {
+    if (sourceTaskIndex == destinationTaskIndex) {
+      return commonRouteMeta;
+    }
+    return null;
+  }
+
+  @Override
+  public EventRouteMetadata routeInputSourceTaskFailedEventToDestination(
+      int sourceTaskIndex, int destinationTaskIndex) throws Exception {
+    return commonRouteMeta;
+  }
+
+  @Override
   public void routeInputSourceTaskFailedEventToDestination(int sourceTaskIndex,
       Map<Integer, List<Integer>> destinationTaskAndInputIndices) {
     destinationTaskAndInputIndices.put(sourceTaskIndex, destinationInputIndices);
@@ -79,8 +112,26 @@ public class OneToOneEdgeManager extends EdgeManagerPlugin {
   }
   
   @Override
+  public int routeInputErrorEventToSource(int destinationTaskIndex, int destinationFailedInputIndex) {
+    return destinationTaskIndex;
+  }
+
+  @Override
   public int getNumDestinationConsumerTasks(int sourceTaskIndex) {
     return 1;
   }
+  
+  private void checkState() {
+    if (stateChecked.get()) {
+      return;
+    }
+    // by the time routing is initiated all task counts must be determined and stable
+    Preconditions.checkState(getContext().getSourceVertexNumTasks() == getContext()
+        .getDestinationVertexNumTasks(), "1-1 source and destination task counts must match."
+        + " Destination: " + getContext().getDestinationVertexName() + " tasks: "
+        + getContext().getDestinationVertexNumTasks() + " Source: "
+        + getContext().getSourceVertexName() + " tasks: " + getContext().getSourceVertexNumTasks());
+    stateChecked.set(true);
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ScatterGatherEdgeManager.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ScatterGatherEdgeManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ScatterGatherEdgeManager.java
index e2608cd..3b66b8f 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ScatterGatherEdgeManager.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ScatterGatherEdgeManager.java
@@ -18,18 +18,29 @@
 
 package org.apache.tez.dag.app.dag.impl;
 
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.atomic.AtomicReference;
+
+import javax.annotation.Nullable;
 
-import org.apache.tez.dag.api.EdgeManagerPlugin;
 import org.apache.tez.dag.api.EdgeManagerPluginContext;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand;
 import org.apache.tez.runtime.api.events.DataMovementEvent;
 import org.apache.tez.runtime.api.events.InputReadErrorEvent;
 
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
 
-public class ScatterGatherEdgeManager extends EdgeManagerPlugin {
+public class ScatterGatherEdgeManager extends EdgeManagerPluginOnDemand {
+  
+  private AtomicReference<ArrayList<EventRouteMetadata>> commonRouteMeta = 
+      new AtomicReference<ArrayList<EventRouteMetadata>>();
+  private Object commonRouteMetaLock = new Object();
+  private int[][] sourceIndices;
+  private int[][] targetIndices;
 
   public ScatterGatherEdgeManager(EdgeManagerPluginContext context) {
     super(context);
@@ -53,6 +64,69 @@ public class ScatterGatherEdgeManager extends EdgeManagerPlugin {
     return physicalOutputs;
   }
 
+  private ArrayList<EventRouteMetadata> getOrCreateCommonRouteMeta() {
+    ArrayList<EventRouteMetadata> metaData = commonRouteMeta.get();
+    if (metaData == null) {
+      synchronized (commonRouteMetaLock) {
+        metaData = commonRouteMeta.get();
+        if (metaData == null) {
+          int numSourceTasks = getContext().getSourceVertexNumTasks();
+          ArrayList<EventRouteMetadata> localEventMeta = Lists
+              .newArrayListWithCapacity(numSourceTasks);
+          for (int i=0; i<numSourceTasks; ++i) {
+            localEventMeta.add(EventRouteMetadata.create(1, new int[]{i}, new int[]{0}));
+          }
+          Preconditions.checkState(commonRouteMeta.compareAndSet(null, localEventMeta));
+          metaData = commonRouteMeta.get();
+        }
+      }
+    }
+    return metaData;
+  }
+
+  private void createIndices() {
+    // source indices derive from num dest tasks (==partitions)
+    int numTargetTasks = getContext().getDestinationVertexNumTasks();
+    sourceIndices = new int[numTargetTasks][];
+    for (int i=0; i<numTargetTasks; ++i) {
+      sourceIndices[i] = new int[]{i};
+    }
+    // target indices derive from num src tasks
+    int numSourceTasks = getContext().getSourceVertexNumTasks();
+    targetIndices = new int[numSourceTasks][];
+    for (int i=0; i<numSourceTasks; ++i) {
+      targetIndices[i] = new int[]{i};
+    }
+  }
+  
+  @Override
+  public void prepareForRouting() throws Exception {
+    createIndices();    
+  }
+
+  @Override
+  public EventRouteMetadata routeDataMovementEventToDestination(
+      int sourceTaskIndex, int sourceOutputIndex, int destinationTaskIndex) throws Exception {
+    if (sourceOutputIndex == destinationTaskIndex) {
+      return getOrCreateCommonRouteMeta().get(sourceTaskIndex);
+    }
+    return null;
+  }
+  
+  @Override
+  public @Nullable EventRouteMetadata routeCompositeDataMovementEventToDestination(
+      int sourceTaskIndex, int destinationTaskIndex)
+      throws Exception {
+    return EventRouteMetadata.create(1, targetIndices[sourceTaskIndex], 
+        sourceIndices[destinationTaskIndex]);
+  }
+
+  @Override
+  public EventRouteMetadata routeInputSourceTaskFailedEventToDestination(
+      int sourceTaskIndex, int destinationTaskIndex) throws Exception {
+    return getOrCreateCommonRouteMeta().get(sourceTaskIndex);
+  }
+
   @Override
   public void routeDataMovementEventToDestination(DataMovementEvent event,
       int sourceTaskIndex, int sourceOutputIndex, Map<Integer, List<Integer>> destinationTaskAndInputIndices) {
@@ -76,6 +150,11 @@ public class ScatterGatherEdgeManager extends EdgeManagerPlugin {
   }
 
   @Override
+  public int routeInputErrorEventToSource(int destinationTaskIndex, int destinationFailedInputIndex) {
+    return destinationFailedInputIndex;
+  }
+
+  @Override
   public int getNumDestinationConsumerTasks(int sourceTaskIndex) {
     return getContext().getDestinationVertexNumTasks();
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
index 15382a8..2e884e7 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
@@ -1493,12 +1493,6 @@ public class TaskImpl implements Task, EventHandler<TaskEvent> {
     }
   }
   
-  @Private
-  @VisibleForTesting
-  public List<TezEvent> getTaskEvents() {
-    return tezEventsForTaskAttempts;
-  }
-
   private static class KillTransition
     implements SingleArcTransition<TaskImpl, TaskEvent> {
     @Override


[31/43] tez git commit: TEZ-2125. Create a task communicator for local mode. Allow tasks to run in the AM. (sseth)

Posted by ss...@apache.org.
TEZ-2125. Create a task communicator for local mode. Allow tasks to run
in the AM. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/a45ef858
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/a45ef858
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/a45ef858

Branch: refs/heads/TEZ-2003
Commit: a45ef85833df44750296430f9b85f42574795578
Parents: c47951a
Author: Siddharth Seth <ss...@apache.org>
Authored: Fri Feb 20 16:12:52 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:30 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                            |  1 +
 .../org/apache/tez/dag/app/DAGAppMaster.java    | 25 +++++---
 .../dag/app/TaskAttemptListenerImpTezDag.java   | 18 ++++--
 .../dag/app/TezLocalTaskCommunicatorImpl.java   | 46 ++++++++++++++
 .../tez/dag/app/TezTaskCommunicatorImpl.java    | 67 ++++++++------------
 .../app/launcher/ContainerLauncherRouter.java   | 17 +++--
 .../app/launcher/LocalContainerLauncher.java    | 31 ++++++---
 .../dag/app/rm/TaskSchedulerEventHandler.java   |  2 +
 .../apache/tez/dag/app/MockDAGAppMaster.java    |  3 +-
 .../app/TestTaskAttemptListenerImplTezDag.java  |  2 +-
 .../tez/service/impl/ContainerRunnerImpl.java   |  2 +-
 .../tez/tests/TestExternalTezServices.java      | 57 +++++++++++++----
 .../org/apache/tez/runtime/task/TezChild.java   | 34 +++++-----
 13 files changed, 204 insertions(+), 101 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 1a2264c..76496c9 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -5,5 +5,6 @@ ALL CHANGES:
   TEZ-2117. Add a manager for ContainerLaunchers running in the AM.
   TEZ-2122. Setup pluggable components at AM/Vertex level.
   TEZ-2123. Fix component managers to use pluggable components. (Enable hybrid mode)
+  TEZ-2125. Create a task communicator for local mode. Allow tasks to run in the AM.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index 89b6506..701eca8 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -465,7 +465,7 @@ public class DAGAppMaster extends AbstractService {
 
     //service to handle requests to TaskUmbilicalProtocol
     taskAttemptListener = createTaskAttemptListener(context,
-        taskHeartbeatHandler, containerHeartbeatHandler, taskCommunicatorClassIdentifiers);
+        taskHeartbeatHandler, containerHeartbeatHandler, taskCommunicatorClassIdentifiers, isLocal);
     addIfService(taskAttemptListener, true);
 
     containerSignatureMatcher = createContainerSignatureMatcher();
@@ -531,7 +531,7 @@ public class DAGAppMaster extends AbstractService {
         taskSchedulerEventHandler);
     addIfServiceDependency(taskSchedulerEventHandler, clientRpcServer);
 
-    this.containerLauncherRouter = createContainerLauncherRouter(conf, containerLauncherClassIdentifiers);
+    this.containerLauncherRouter = createContainerLauncherRouter(conf, containerLauncherClassIdentifiers, isLocal);
     addIfService(containerLauncherRouter, true);
     dispatcher.register(NMCommunicatorEventType.class, containerLauncherRouter);
 
@@ -1038,9 +1038,13 @@ public class DAGAppMaster extends AbstractService {
   }
 
   protected TaskAttemptListener createTaskAttemptListener(AppContext context,
-      TaskHeartbeatHandler thh, ContainerHeartbeatHandler chh, String[] taskCommunicatorClasses) {
+                                                          TaskHeartbeatHandler thh,
+                                                          ContainerHeartbeatHandler chh,
+                                                          String[] taskCommunicatorClasses,
+                                                          boolean isLocal) {
     TaskAttemptListener lis =
-        new TaskAttemptListenerImpTezDag(context, thh, chh,jobTokenSecretManager, taskCommunicatorClasses);
+        new TaskAttemptListenerImpTezDag(context, thh, chh, jobTokenSecretManager,
+            taskCommunicatorClasses, isLocal);
     return lis;
   }
 
@@ -1061,10 +1065,12 @@ public class DAGAppMaster extends AbstractService {
     return chh;
   }
 
-  protected ContainerLauncherRouter createContainerLauncherRouter(Configuration conf, String []containerLauncherClasses) throws
+  protected ContainerLauncherRouter createContainerLauncherRouter(Configuration conf,
+                                                                  String[] containerLauncherClasses,
+                                                                  boolean isLocal) throws
       UnknownHostException {
-    return  new ContainerLauncherRouter(conf, context, taskAttemptListener, workingDirectory, containerLauncherClasses);
-
+    return new ContainerLauncherRouter(conf, context, taskAttemptListener, workingDirectory,
+        containerLauncherClasses, isLocal);
   }
 
   public ApplicationId getAppID() {
@@ -2331,9 +2337,8 @@ public class DAGAppMaster extends AbstractService {
     StringBuilder sb = new StringBuilder();
     sb.append("AM Level configured ").append(component).append(": ");
     for (int i = 0; i < classIdentifiers.length; i++) {
-      sb.append("[").append(i).append(":").append(map.inverse().get(i)).append(":")
-          .append(taskSchedulers.inverse().get(i)).append(
-          "]");
+      sb.append("[").append(i).append(":").append(map.inverse().get(i))
+          .append(":").append(classIdentifiers[i]).append("]");
       if (i != classIdentifiers.length - 1) {
         sb.append(",");
       }

http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index d21b7d0..8346839 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -99,13 +99,20 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
                                       TaskHeartbeatHandler thh, ContainerHeartbeatHandler chh,
                                       // TODO TEZ-2003 pre-merge. Remove reference to JobTokenSecretManager.
                                       JobTokenSecretManager jobTokenSecretManager,
-                                      String [] taskCommunicatorClassIdentifiers) {
+                                      String [] taskCommunicatorClassIdentifiers,
+                                      boolean isPureLocalMode) {
     super(TaskAttemptListenerImpTezDag.class.getName());
     this.context = context;
     this.taskHeartbeatHandler = thh;
     this.containerHeartbeatHandler = chh;
     if (taskCommunicatorClassIdentifiers == null || taskCommunicatorClassIdentifiers.length == 0) {
-      taskCommunicatorClassIdentifiers = new String[] {TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+      if (isPureLocalMode) {
+        taskCommunicatorClassIdentifiers =
+            new String[]{TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT};
+      } else {
+        taskCommunicatorClassIdentifiers =
+            new String[]{TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+      }
     }
     this.taskCommunicators = new TaskCommunicator[taskCommunicatorClassIdentifiers.length];
     for (int i = 0 ; i < taskCommunicatorClassIdentifiers.length ; i++) {
@@ -131,11 +138,12 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
   }
 
   private TaskCommunicator createTaskCommunicator(String taskCommClassIdentifier) {
-    if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT) ||
-        taskCommClassIdentifier
-            .equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
+    if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
       LOG.info("Using Default Task Communicator");
       return new TezTaskCommunicatorImpl(this);
+    } else if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
+      LOG.info("Using Default Local Task Communicator");
+      return new TezLocalTaskCommunicatorImpl(this);
     } else {
       LOG.info("Using TaskCommunicator: " + taskCommClassIdentifier);
       Class<? extends TaskCommunicator> taskCommClazz = (Class<? extends TaskCommunicator>) ReflectionUtils

http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java
new file mode 100644
index 0000000..3704cc4
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app;
+
+import java.net.InetAddress;
+import java.net.InetSocketAddress;
+import java.net.UnknownHostException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.tez.dag.api.TaskCommunicatorContext;
+import org.apache.tez.dag.api.TezUncheckedException;
+
+public class TezLocalTaskCommunicatorImpl extends TezTaskCommunicatorImpl {
+
+  private static final Log LOG = LogFactory.getLog(TezLocalTaskCommunicatorImpl.class);
+
+  public TezLocalTaskCommunicatorImpl(
+      TaskCommunicatorContext taskCommunicatorContext) {
+    super(taskCommunicatorContext);
+  }
+
+  @Override
+  protected void startRpcServer() {
+    try {
+      this.address = new InetSocketAddress(InetAddress.getLocalHost(), 0);
+    } catch (UnknownHostException e) {
+      throw new TezUncheckedException(e);
+    }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Not starting TaskAttemptListener RPC in LocalMode");
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
index 258c927..0bf1b5d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -15,10 +15,8 @@
 package org.apache.tez.dag.app;
 
 import java.io.IOException;
-import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.net.URISyntaxException;
-import java.net.UnknownHostException;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
@@ -76,7 +74,7 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
   private final TezTaskUmbilicalProtocol taskUmbilical;
   private final String tokenIdentifier;
   private final Token<JobTokenIdentifier> sessionToken;
-  private InetSocketAddress address;
+  protected InetSocketAddress address;
   private Server server;
 
   public static final class ContainerInfo {
@@ -120,10 +118,8 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
     this.sessionToken = TokenCache.getSessionToken(taskCommunicatorContext.getCredentials());
   }
 
-
   @Override
   public void serviceStart() {
-
     startRpcServer();
   }
 
@@ -134,43 +130,32 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
 
   protected void startRpcServer() {
     Configuration conf = getConfig();
-    if (!conf.getBoolean(TezConfiguration.TEZ_LOCAL_MODE, TezConfiguration.TEZ_LOCAL_MODE_DEFAULT)) {
-      try {
-        JobTokenSecretManager jobTokenSecretManager =
-            new JobTokenSecretManager();
-        jobTokenSecretManager.addTokenForJob(tokenIdentifier, sessionToken);
-
-        server = new RPC.Builder(conf)
-            .setProtocol(TezTaskUmbilicalProtocol.class)
-            .setBindAddress("0.0.0.0")
-            .setPort(0)
-            .setInstance(taskUmbilical)
-            .setNumHandlers(
-                conf.getInt(TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT,
-                    TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT_DEFAULT))
-            .setSecretManager(jobTokenSecretManager).build();
-
-        // Enable service authorization?
-        if (conf.getBoolean(
-            CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
-            false)) {
-          refreshServiceAcls(conf, new TezAMPolicyProvider());
-        }
-
-        server.start();
-        this.address = NetUtils.getConnectAddress(server);
-      } catch (IOException e) {
-        throw new TezUncheckedException(e);
-      }
-    } else {
-      try {
-        this.address = new InetSocketAddress(InetAddress.getLocalHost(), 0);
-      } catch (UnknownHostException e) {
-        throw new TezUncheckedException(e);
-      }
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("Not starting TaskAttemptListener RPC in LocalMode");
+    try {
+      JobTokenSecretManager jobTokenSecretManager =
+          new JobTokenSecretManager();
+      jobTokenSecretManager.addTokenForJob(tokenIdentifier, sessionToken);
+
+      server = new RPC.Builder(conf)
+          .setProtocol(TezTaskUmbilicalProtocol.class)
+          .setBindAddress("0.0.0.0")
+          .setPort(0)
+          .setInstance(taskUmbilical)
+          .setNumHandlers(
+              conf.getInt(TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT,
+                  TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT_DEFAULT))
+          .setSecretManager(jobTokenSecretManager).build();
+
+      // Enable service authorization?
+      if (conf.getBoolean(
+          CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
+          false)) {
+        refreshServiceAcls(conf, new TezAMPolicyProvider());
       }
+
+      server.start();
+      this.address = NetUtils.getConnectAddress(server);
+    } catch (IOException e) {
+      throw new TezUncheckedException(e);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
index 4f9b5bf..70b0cbc 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
@@ -25,7 +25,6 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.tez.common.ReflectionUtils;
-import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.TezConstants;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.app.AppContext;
@@ -49,17 +48,24 @@ public class ContainerLauncherRouter extends AbstractService
   public ContainerLauncherRouter(Configuration conf, AppContext context,
                                  TaskAttemptListener taskAttemptListener,
                                  String workingDirectory,
-                                 String[] containerLauncherClassIdentifiers) throws UnknownHostException {
+                                 String[] containerLauncherClassIdentifiers,
+                                 boolean isPureLocalMode) throws UnknownHostException {
     super(ContainerLauncherRouter.class.getName());
 
     if (containerLauncherClassIdentifiers == null || containerLauncherClassIdentifiers.length == 0) {
-      containerLauncherClassIdentifiers = new String[] {TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+      if (isPureLocalMode) {
+        containerLauncherClassIdentifiers =
+            new String[]{TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT};
+      } else {
+        containerLauncherClassIdentifiers =
+            new String[]{TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+      }
     }
     containerLaunchers = new ContainerLauncher[containerLauncherClassIdentifiers.length];
 
     for (int i = 0; i < containerLauncherClassIdentifiers.length; i++) {
       containerLaunchers[i] = createContainerLauncher(containerLauncherClassIdentifiers[i], context,
-          taskAttemptListener, workingDirectory, conf);
+          taskAttemptListener, workingDirectory, isPureLocalMode, conf);
     }
   }
 
@@ -67,6 +73,7 @@ public class ContainerLauncherRouter extends AbstractService
                                                     AppContext context,
                                                     TaskAttemptListener taskAttemptListener,
                                                     String workingDirectory,
+                                                    boolean isPureLocalMode,
                                                     Configuration conf) throws
       UnknownHostException {
     if (containerLauncherClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
@@ -76,7 +83,7 @@ public class ContainerLauncherRouter extends AbstractService
         .equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
       LOG.info("Creating LocalContainerLauncher");
       return
-          new LocalContainerLauncher(context, taskAttemptListener, workingDirectory);
+          new LocalContainerLauncher(context, taskAttemptListener, workingDirectory, isPureLocalMode);
     } else {
       LOG.info("Creating container launcher : " + containerLauncherClassIdentifier);
       Class<? extends ContainerLauncher> containerLauncherClazz =

http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
index 9a38732..18b2e35 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
@@ -36,6 +36,7 @@ import java.util.concurrent.RejectedExecutionException;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
 import com.google.common.util.concurrent.FutureCallback;
 import com.google.common.util.concurrent.Futures;
 import com.google.common.util.concurrent.ListenableFuture;
@@ -90,9 +91,10 @@ public class LocalContainerLauncher extends AbstractService implements
   private final AtomicBoolean serviceStopped = new AtomicBoolean(false);
   private final String workingDirectory;
   private final TaskAttemptListener tal;
-  private final Map<String, String> localEnv = new HashMap<String, String>();
+  private final Map<String, String> localEnv;
   private final ExecutionContext executionContext;
   private int numExecutors;
+  private final boolean isPureLocalMode;
 
   private final ConcurrentHashMap<ContainerId, ListenableFuture<TezChild.ContainerExecutionResult>>
       runningContainers =
@@ -112,16 +114,26 @@ public class LocalContainerLauncher extends AbstractService implements
 
   public LocalContainerLauncher(AppContext context,
                                 TaskAttemptListener taskAttemptListener,
-                                String workingDirectory) throws UnknownHostException {
+                                String workingDirectory,
+                                boolean isPureLocalMode) throws UnknownHostException {
     super(LocalContainerLauncher.class.getName());
     this.context = context;
     this.tal = taskAttemptListener;
 
     this.workingDirectory = workingDirectory;
-    AuxiliaryServiceHelper.setServiceDataIntoEnv(
-        ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID, ByteBuffer.allocate(4).putInt(0), localEnv);
-    executionContext = new ExecutionContextImpl(InetAddress.getLocalHost().getHostName());
-    // User cannot be set here since it isn't available till a DAG is running.
+    this.isPureLocalMode = isPureLocalMode;
+    if (isPureLocalMode) {
+      localEnv = Maps.newHashMap();
+      AuxiliaryServiceHelper.setServiceDataIntoEnv(
+          ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID, ByteBuffer.allocate(4).putInt(0), localEnv);
+    } else {
+      localEnv = System.getenv();
+    }
+
+    // Check if the hostname is set in the environment before overriding it.
+    String host = isPureLocalMode ? InetAddress.getLocalHost().getHostName() :
+        System.getenv(Environment.NM_HOST.name());
+    executionContext = new ExecutionContextImpl(host);
   }
 
   @Override
@@ -338,7 +350,9 @@ public class LocalContainerLauncher extends AbstractService implements
       InterruptedException, TezException, IOException {
     Map<String, String> containerEnv = new HashMap<String, String>();
     containerEnv.putAll(localEnv);
-    containerEnv.put(Environment.USER.name(), context.getUser());
+    // Use the user from env if it's available.
+    String user = isPureLocalMode ? System.getenv(Environment.USER.name()) : context.getUser();
+    containerEnv.put(Environment.USER.name(), user);
 
     long memAvailable;
     synchronized (this) { // needed to fix findbugs Inconsistent synchronization warning
@@ -347,8 +361,7 @@ public class LocalContainerLauncher extends AbstractService implements
     TezChild tezChild =
         TezChild.newTezChild(defaultConf, null, 0, containerId.toString(), tokenIdentifier,
             attemptNumber, localDirs, workingDirectory, containerEnv, "", executionContext, credentials,
-            memAvailable, context.getUser());
-    tezChild.setUmbilical(tezTaskUmbilicalProtocol);
+            memAvailable, context.getUser(), tezTaskUmbilicalProtocol);
     return tezChild;
   }
 

http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
index 5a0ace8..5a8e9fe 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
@@ -434,6 +434,8 @@ public class TaskSchedulerEventHandler extends AbstractService
       } else {
         customAppIdIdentifier = SCHEDULER_APP_ID_BASE + (j++ * SCHEDULER_APP_ID_INCREMENT);
       }
+      LOG.info("ClusterIdentifier for TaskScheduler [" + i + ":" + taskSchedulerClasses[i] + "]=" +
+          customAppIdIdentifier);
       taskSchedulers[i] = createTaskScheduler(host, port,
           trackingUrl, appContext, taskSchedulerClasses[i], customAppIdIdentifier);
     }

http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index 59efb87..4f014a4 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -511,7 +511,8 @@ public class MockDAGAppMaster extends DAGAppMaster {
   // use mock container launcher for tests
   @Override
   protected ContainerLauncherRouter createContainerLauncherRouter(final Configuration conf,
-                                                                  String[] containerLaunchers)
+                                                                  String[] containerLaunchers,
+                                                                  boolean isLocal)
       throws UnknownHostException {
     return new ContainerLauncherRouter(containerLauncher);
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
index 98fcddc..0cf1959 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
@@ -119,7 +119,7 @@ public class TestTaskAttemptListenerImplTezDag {
     doReturn(container).when(amContainer).getContainer();
 
     taskAttemptListener = new TaskAttemptListenerImplForTest(appContext,
-        mock(TaskHeartbeatHandler.class), mock(ContainerHeartbeatHandler.class), null, null);
+        mock(TaskHeartbeatHandler.class), mock(ContainerHeartbeatHandler.class), null, null, false);
 
     taskSpec = mock(TaskSpec.class);
     doReturn(taskAttemptID).when(taskSpec).getTaskAttemptID();

http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
index 4a6ce33..25d6030 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
@@ -282,7 +282,7 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
               request.getContainerIdString(),
               request.getTokenIdentifier(), request.getAppAttemptNumber(), workingDir, localDirs,
               envMap, objectRegistry, pid,
-              executionContext, credentials, memoryAvailable, request.getUser());
+              executionContext, credentials, memoryAvailable, request.getUser(), null);
       ContainerExecutionResult result = tezChild.run();
       LOG.info("ExecutionTime for Container: " + request.getContainerIdString() + "=" +
           sw.stop().elapsedMillis());

http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
index 9c149c6..01c2080 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
@@ -40,6 +40,7 @@ import org.apache.tez.service.MiniTezTestServiceCluster;
 import org.apache.tez.test.MiniTezCluster;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
+import org.junit.Ignore;
 import org.junit.Test;
 
 public class TestExternalTezServices {
@@ -120,26 +121,23 @@ public class TestExternalTezServices {
 
     confForJobs.setStrings(TezConfiguration.TEZ_AM_TASK_SCHEDULERS,
         TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT,
-//        TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
+        TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
         EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceTaskSchedulerService.class.getName());
 
     confForJobs.setStrings(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHERS,
         TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT,
-//        TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
+        TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
         EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceNoOpContainerLauncher.class.getName());
 
     confForJobs.setStrings(TezConfiguration.TEZ_AM_TASK_COMMUNICATORS,
         TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT,
-//        TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
+        TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
         EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceTaskCommunicatorImpl.class.getName());
 
     // Default all jobs to run via the service. Individual tests override this on a per vertex/dag level.
-    confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME,
-        TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
-    confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME,
-        TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
-    confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME,
-        TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
+    confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME, EXT_PUSH_ENTITY_NAME);
+    confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME, EXT_PUSH_ENTITY_NAME);
+    confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME, EXT_PUSH_ENTITY_NAME);
 
     // Setup various executor sets
     PROPS_REGULAR_CONTAINERS.put(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME,
@@ -232,18 +230,55 @@ public class TestExternalTezServices {
 
   @Test(timeout = 60000)
   public void testMixed1() throws Exception { // M-ExtService, R-containers
-    int expectedExternalSubmissions = 4 + 0; //4 for 4 src files, 3 for num reducers.
+    int expectedExternalSubmissions = 4 + 0; //4 for 4 src files, 0 for num reducers.
     runJoinValidate("Mixed1", expectedExternalSubmissions, PROPS_EXT_SERVICE_PUSH,
         PROPS_EXT_SERVICE_PUSH, PROPS_REGULAR_CONTAINERS);
   }
 
   @Test(timeout = 60000)
   public void testMixed2() throws Exception { // M-Containers, R-ExtService
-    int expectedExternalSubmissions = 0 + 3; //4 for 4 src files, 3 for num reducers.
+    int expectedExternalSubmissions = 0 + 3; // 3 for num reducers.
     runJoinValidate("Mixed2", expectedExternalSubmissions, PROPS_REGULAR_CONTAINERS,
         PROPS_REGULAR_CONTAINERS, PROPS_EXT_SERVICE_PUSH);
   }
 
+  @Test(timeout = 60000)
+  public void testMixed3() throws Exception { // M - service, R-AM
+    int expectedExternalSubmissions = 4 + 0; //4 for 4 src files, 0 for num reducers (in-AM).
+    runJoinValidate("Mixed3", expectedExternalSubmissions, PROPS_EXT_SERVICE_PUSH,
+        PROPS_EXT_SERVICE_PUSH, PROPS_IN_AM);
+  }
+
+  @Test(timeout = 60000)
+  public void testMixed4() throws Exception { // M - containers, R-AM
+    int expectedExternalSubmissions = 0 + 0; // Nothing in external service.
+    runJoinValidate("Mixed4", expectedExternalSubmissions, PROPS_REGULAR_CONTAINERS,
+        PROPS_REGULAR_CONTAINERS, PROPS_IN_AM);
+  }
+
+  @Test(timeout = 60000)
+  public void testMixed5() throws Exception { // M1 - containers, M2-extservice, R-AM
+    int expectedExternalSubmissions = 2 + 0; // 2 for M2
+    runJoinValidate("Mixed5", expectedExternalSubmissions, PROPS_REGULAR_CONTAINERS,
+        PROPS_EXT_SERVICE_PUSH, PROPS_IN_AM);
+  }
+
+
+  @Ignore // Re-activate this after the AM registers the shuffle token with the launcher.
+  @Test(timeout = 60000)
+  public void testMixed6() throws Exception { // M - AM, R - Service
+    int expectedExternalSubmissions = 0 + 3; // 3 for R in service
+    runJoinValidate("Mixed6", expectedExternalSubmissions, PROPS_IN_AM,
+        PROPS_IN_AM, PROPS_EXT_SERVICE_PUSH);
+  }
+
+  @Test(timeout = 60000)
+  public void testMixed7() throws Exception { // M - AM, R - Containers
+    int expectedExternalSubmissions = 0; // Nothing in ext service
+    runJoinValidate("Mixed7", expectedExternalSubmissions, PROPS_IN_AM,
+        PROPS_IN_AM, PROPS_REGULAR_CONTAINERS);
+  }
+
 
   private void runJoinValidate(String name, int extExpectedCount, Map<String, String> lhsProps,
                                Map<String, String> rhsProps,

http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
index 3cba3ce..7615f08 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
@@ -64,6 +64,7 @@ import org.apache.tez.dag.utils.RelocalizationUtils;
 import org.apache.tez.runtime.api.ExecutionContext;
 import org.apache.tez.runtime.api.impl.ExecutionContextImpl;
 import org.apache.tez.runtime.api.impl.TaskSpec;
+import org.apache.tez.runtime.api.impl.TezUmbilical;
 import org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -93,7 +94,6 @@ public class TezChild {
   private final int amHeartbeatInterval;
   private final long sendCounterInterval;
   private final int maxEventsToGet;
-  private final boolean isLocal;
   private final String workingDir;
 
   private final ListeningExecutorService executor;
@@ -108,9 +108,10 @@ public class TezChild {
   private final String user;
 
   private Multimap<String, String> startedInputsMap = HashMultimap.create();
+  private final boolean ownUmbilical;
 
+  private final TezTaskUmbilicalProtocol umbilical;
   private TaskReporter taskReporter;
-  private TezTaskUmbilicalProtocol umbilical;
   private int taskCount = 0;
   private TezVertexID lastVertexID;
 
@@ -119,7 +120,7 @@ public class TezChild {
       Map<String, String> serviceProviderEnvMap,
       ObjectRegistryImpl objectRegistry, String pid,
       ExecutionContext executionContext,
-      Credentials credentials, long memAvailable, String user)
+      Credentials credentials, long memAvailable, String user, TezTaskUmbilicalProtocol umbilical)
       throws IOException, InterruptedException {
     this.defaultConf = conf;
     this.containerIdString = containerIdentifier;
@@ -133,6 +134,8 @@ public class TezChild {
     this.memAvailable = memAvailable;
     this.user = user;
 
+    LOG.info("TezChild created with umbilical: " + umbilical);
+
     getTaskMaxSleepTime = defaultConf.getInt(
         TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX,
         TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX_DEFAULT);
@@ -161,25 +164,27 @@ public class TezChild {
       }
     }
 
-    this.isLocal = defaultConf.getBoolean(TezConfiguration.TEZ_LOCAL_MODE,
-        TezConfiguration.TEZ_LOCAL_MODE_DEFAULT);
     UserGroupInformation taskOwner = UserGroupInformation.createRemoteUser(tokenIdentifier);
     Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
 
     serviceConsumerMetadata.put(TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID,
         TezCommonUtils.convertJobTokenToBytes(jobToken));
 
-    if (!isLocal) {
+    if (umbilical == null) {
       final InetSocketAddress address = NetUtils.createSocketAddrForHost(host, port);
       SecurityUtil.setTokenService(jobToken, address);
       taskOwner.addToken(jobToken);
-      umbilical = taskOwner.doAs(new PrivilegedExceptionAction<TezTaskUmbilicalProtocol>() {
+      this.umbilical = taskOwner.doAs(new PrivilegedExceptionAction<TezTaskUmbilicalProtocol>() {
         @Override
         public TezTaskUmbilicalProtocol run() throws Exception {
           return RPC.getProxy(TezTaskUmbilicalProtocol.class,
               TezTaskUmbilicalProtocol.versionID, address, defaultConf);
         }
       });
+      ownUmbilical = true;
+    } else {
+      this.umbilical = umbilical;
+      ownUmbilical = false;
     }
   }
   
@@ -353,7 +358,7 @@ public class TezChild {
       if (taskReporter != null) {
         taskReporter.shutdown();
       }
-      if (!isLocal) {
+      if (ownUmbilical) {
         RPC.stopProxy(umbilical);
         // TODO Temporary change. Revert. Ideally, move this over to the main method in TezChild if possible.
 //        LogManager.shutdown();
@@ -361,12 +366,6 @@ public class TezChild {
     }
   }
 
-  public void setUmbilical(TezTaskUmbilicalProtocol tezTaskUmbilicalProtocol){
-    if(tezTaskUmbilicalProtocol != null){
-      this.umbilical = tezTaskUmbilicalProtocol;
-    }
-  }
-
   public static class ContainerExecutionResult {
     public static enum ExitStatus {
       SUCCESS(0),
@@ -412,7 +411,8 @@ public class TezChild {
   public static TezChild newTezChild(Configuration conf, String host, int port, String containerIdentifier,
       String tokenIdentifier, int attemptNumber, String[] localDirs, String workingDirectory,
       Map<String, String> serviceProviderEnvMap, @Nullable String pid,
-      ExecutionContext executionContext, Credentials credentials, long memAvailable, String user)
+      ExecutionContext executionContext, Credentials credentials, long memAvailable, String user,
+      TezTaskUmbilicalProtocol tezUmbilical)
       throws IOException, InterruptedException, TezException {
 
     // Pull in configuration specified for the session.
@@ -425,7 +425,7 @@ public class TezChild {
 
     return new TezChild(conf, host, port, containerIdentifier, tokenIdentifier,
         attemptNumber, workingDirectory, localDirs, serviceProviderEnvMap, objectRegistry, pid,
-        executionContext, credentials, memAvailable, user);
+        executionContext, credentials, memAvailable, user, tezUmbilical);
   }
 
   public static void main(String[] args) throws IOException, InterruptedException, TezException {
@@ -459,7 +459,7 @@ public class TezChild {
         tokenIdentifier, attemptNumber, localDirs, System.getenv(Environment.PWD.name()),
         System.getenv(), pid, new ExecutionContextImpl(System.getenv(Environment.NM_HOST.name())),
         credentials, Runtime.getRuntime().maxMemory(), System
-            .getenv(ApplicationConstants.Environment.USER.toString()));
+            .getenv(ApplicationConstants.Environment.USER.toString()), null);
     tezChild.run();
   }
 


[30/43] tez git commit: TEZ-2175. Task priority should be available to the TaskCommunicator plugin. (sseth)

Posted by ss...@apache.org.
TEZ-2175. Task priority should be available to the TaskCommunicator plugin. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/99a1b85e
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/99a1b85e
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/99a1b85e

Branch: refs/heads/TEZ-2003
Commit: 99a1b85e2275c4635b2bddbdfd258c43116ebd9e
Parents: 6014c96
Author: Siddharth Seth <ss...@apache.org>
Authored: Tue Mar 10 00:47:07 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:30 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                                            | 1 +
 .../src/main/java/org/apache/tez/dag/api/TaskCommunicator.java  | 2 +-
 .../org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java    | 2 +-
 .../java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java    | 3 ++-
 .../dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java    | 5 +++--
 5 files changed, 8 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/99a1b85e/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 5c5fd8e..7726815 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -9,5 +9,6 @@ ALL CHANGES:
   TEZ-2131. Add additional tests for tasks running in the AM.
   TEZ-2138. Fix minor bugs in adding default scheduler, getting launchers.
   TEZ-2139. Update tez version to 0.7.0-TEZ-2003-SNAPSHOT.
+  TEZ-2175. Task priority should be available to the TaskCommunicator plugin.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/99a1b85e/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
index c9f85e0..82eed20 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
@@ -43,7 +43,7 @@ public abstract class TaskCommunicator extends AbstractService {
   public abstract void registerRunningTaskAttempt(ContainerId containerId, TaskSpec taskSpec,
                                                   Map<String, LocalResource> additionalResources,
                                                   Credentials credentials,
-                                                  boolean credentialsChanged);
+                                                  boolean credentialsChanged, int priority);
 
   // TODO TEZ-2003 Remove reference to TaskAttemptID
   public abstract void unregisterRunningTaskAttempt(TezTaskAttemptID taskAttemptID);

http://git-wip-us.apache.org/repos/asf/tez/blob/99a1b85e/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index 8346839..b570301 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -360,7 +360,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
     }
     taskCommunicators[taskCommId].registerRunningTaskAttempt(containerId, amContainerTask.getTask(),
         amContainerTask.getAdditionalResources(), amContainerTask.getCredentials(),
-        amContainerTask.haveCredentialsChanged());
+        amContainerTask.haveCredentialsChanged(), amContainerTask.getPriority());
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/tez/blob/99a1b85e/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
index 0bf1b5d..f288748 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -194,7 +194,8 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
   @Override
   public void registerRunningTaskAttempt(ContainerId containerId, TaskSpec taskSpec,
                                          Map<String, LocalResource> additionalResources,
-                                         Credentials credentials, boolean credentialsChanged) {
+                                         Credentials credentials, boolean credentialsChanged,
+                                         int priority) {
 
     ContainerInfo containerInfo = registeredContainers.get(containerId);
     Preconditions.checkNotNull(containerInfo,

http://git-wip-us.apache.org/repos/asf/tez/blob/99a1b85e/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
index 78cdcde..a327caf 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
@@ -100,9 +100,10 @@ public class TezTestServiceTaskCommunicatorImpl extends TezTaskCommunicatorImpl
   public void registerRunningTaskAttempt(final ContainerId containerId, final TaskSpec taskSpec,
                                          Map<String, LocalResource> additionalResources,
                                          Credentials credentials,
-                                         boolean credentialsChanged)  {
+                                         boolean credentialsChanged,
+                                         int priority)  {
     super.registerRunningTaskAttempt(containerId, taskSpec, additionalResources, credentials,
-        credentialsChanged);
+        credentialsChanged, priority);
     SubmitWorkRequestProto requestProto = null;
     try {
       requestProto = constructSubmitWorkRequest(containerId, taskSpec);


[19/43] tez git commit: TEZ-2006. Task communication plane needs to be pluggable. (sseth)

Posted by ss...@apache.org.
TEZ-2006. Task communication plane needs to be pluggable. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/7b71d3b7
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/7b71d3b7
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/7b71d3b7

Branch: refs/heads/TEZ-2003
Commit: 7b71d3b762f9bf84d05caaa6c6cc48028478a07f
Parents: 44bea93
Author: Siddharth Seth <ss...@apache.org>
Authored: Thu Feb 12 11:25:45 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:40:12 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                            |   1 +
 .../apache/tez/dag/api/TaskCommunicator.java    |  54 ++
 .../tez/dag/api/TaskCommunicatorContext.java    |  48 ++
 .../tez/dag/api/TaskHeartbeatRequest.java       |  63 +++
 .../tez/dag/api/TaskHeartbeatResponse.java      |  39 ++
 .../java/org/apache/tez/dag/app/AppContext.java |   3 +
 .../org/apache/tez/dag/app/DAGAppMaster.java    |   5 +
 .../dag/app/TaskAttemptListenerImpTezDag.java   | 522 +++++++------------
 .../tez/dag/app/TezTaskCommunicatorImpl.java    | 474 +++++++++++++++++
 .../app/launcher/LocalContainerLauncher.java    |  10 +-
 .../tez/dag/app/rm/container/AMContainer.java   |   3 +-
 .../rm/container/AMContainerEventAssignTA.java  |   2 +
 .../dag/app/rm/container/AMContainerImpl.java   |   1 +
 .../apache/tez/dag/app/MockDAGAppMaster.java    |  27 +-
 .../app/TestTaskAttemptListenerImplTezDag.java  |  82 +--
 15 files changed, 967 insertions(+), 367 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 1822fcb..d7e4be5 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -1,4 +1,5 @@
 ALL CHANGES:
   TEZ-2019. Temporarily allow the scheduler and launcher to be specified via configuration.
+  TEZ-2006. Task communication plane needs to be pluggable.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
new file mode 100644
index 0000000..97f9c16
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.api;
+
+import java.net.InetSocketAddress;
+import java.util.Map;
+
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.runtime.api.impl.TaskSpec;
+
+// TODO TEZ-2003 Move this into the tez-api module
+public abstract class TaskCommunicator extends AbstractService {
+  public TaskCommunicator(String name) {
+    super(name);
+  }
+
+  // TODO TEZ-2003 Ideally, don't expose YARN containerId; instead expose a Tez specific construct.
+  // TODO When talking to an external service, this plugin implementer may need access to a host:port
+  public abstract void registerRunningContainer(ContainerId containerId, String hostname, int port);
+
+  // TODO TEZ-2003 Ideally, don't expose YARN containerId; instead expose a Tez specific construct.
+  public abstract void registerContainerEnd(ContainerId containerId);
+
+  // TODO TEZ-2003 TaskSpec breakup into a clean interface
+  // TODO TEZ-2003 Add support for priority
+  public abstract void registerRunningTaskAttempt(ContainerId containerId, TaskSpec taskSpec,
+                                                  Map<String, LocalResource> additionalResources,
+                                                  Credentials credentials,
+                                                  boolean credentialsChanged);
+
+  // TODO TEZ-2003 Remove reference to TaskAttemptID
+  public abstract void unregisterRunningTaskAttempt(TezTaskAttemptID taskAttemptID);
+
+  // TODO TEZ-2003 This doesn't necessarily belong here. A server may not start within the AM.
+  public abstract InetSocketAddress getAddress();
+
+  // TODO Eventually. Add methods here to support preemption of tasks.
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
new file mode 100644
index 0000000..9b2d889
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.api;
+
+import java.io.IOException;
+
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+
+
+// Do not make calls into this from within a held lock.
+
+// TODO TEZ-2003 Move this into the tez-api module
+public interface TaskCommunicatorContext {
+
+  // TODO TEZ-2003 Add signalling back into this to indicate errors - e.g. Container unregsitered, task no longer running, etc.
+
+  // TODO TEZ-2003 Maybe add book-keeping as a helper library, instead of each impl tracking container to task etc.
+
+  ApplicationAttemptId getApplicationAttemptId();
+  Credentials getCredentials();
+
+  // TODO TEZ-2003 Move to vertex, taskIndex, version
+  boolean canCommit(TezTaskAttemptID taskAttemptId) throws IOException;
+
+  TaskHeartbeatResponse heartbeat(TaskHeartbeatRequest request) throws IOException, TezException;
+
+  boolean isKnownContainer(ContainerId containerId);
+
+  // TODO TEZ-2003 Move to vertex, taskIndex, version
+  void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId);
+
+  // TODO Eventually Add methods to report availability stats to the scheduler.
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/api/TaskHeartbeatRequest.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskHeartbeatRequest.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskHeartbeatRequest.java
new file mode 100644
index 0000000..f6bc8f0
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskHeartbeatRequest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.api;
+
+import java.util.List;
+
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.runtime.api.impl.TezEvent;
+
+// TODO TEZ-2003 Move this into the tez-api module
+public class TaskHeartbeatRequest {
+
+  // TODO TEZ-2003 Ideally containerIdentifier should not be part of the request.
+  // Replace with a task lookup - vertex name + task index
+  private final String containerIdentifier;
+  // TODO TEZ-2003 Get rid of the task attemptId reference if possible
+  private final TezTaskAttemptID taskAttemptId;
+  private final List<TezEvent> events;
+  private final int startIndex;
+  private final int maxEvents;
+
+
+  public TaskHeartbeatRequest(String containerIdentifier, TezTaskAttemptID taskAttemptId, List<TezEvent> events, int startIndex,
+                              int maxEvents) {
+    this.containerIdentifier = containerIdentifier;
+    this.taskAttemptId = taskAttemptId;
+    this.events = events;
+    this.startIndex = startIndex;
+    this.maxEvents = maxEvents;
+  }
+
+  public String getContainerIdentifier() {
+    return containerIdentifier;
+  }
+
+  public TezTaskAttemptID getTaskAttemptId() {
+    return taskAttemptId;
+  }
+
+  public List<TezEvent> getEvents() {
+    return events;
+  }
+
+  public int getStartIndex() {
+    return startIndex;
+  }
+
+  public int getMaxEvents() {
+    return maxEvents;
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/api/TaskHeartbeatResponse.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskHeartbeatResponse.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskHeartbeatResponse.java
new file mode 100644
index 0000000..c82a743
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskHeartbeatResponse.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.api;
+
+import java.util.List;
+
+import org.apache.tez.runtime.api.impl.TezEvent;
+
+// TODO TEZ-2003 Move this into the tez-api module
+public class TaskHeartbeatResponse {
+
+  private final boolean shouldDie;
+  private List<TezEvent> events;
+
+  public TaskHeartbeatResponse(boolean shouldDie, List<TezEvent> events) {
+    this.shouldDie = shouldDie;
+    this.events = events;
+  }
+
+  public boolean isShouldDie() {
+    return shouldDie;
+  }
+
+  public List<TezEvent> getEvents() {
+    return events;
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java b/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java
index 4781784..37f7624 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java
@@ -24,6 +24,7 @@ import java.util.Set;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -106,4 +107,6 @@ public interface AppContext {
   String[] getLocalDirs();
 
   String getAMUser();
+
+  Credentials getAppCredentials();
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index 73ee56e..bfc2d58 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -1486,6 +1486,11 @@ public class DAGAppMaster extends AbstractService {
     }
 
     @Override
+    public Credentials getAppCredentials() {
+      return amCredentials;
+    }
+
+    @Override
     public Map<ApplicationAccessType, String> getApplicationACLs() {
       if (getServiceState() != STATE.STARTED) {
         throw new TezUncheckedException(

http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index 970489d..0d9dc31 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -18,15 +18,14 @@
 package org.apache.tez.dag.app;
 
 import java.io.IOException;
-import java.net.InetAddress;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
 import java.net.InetSocketAddress;
 import java.net.URISyntaxException;
 import java.net.UnknownHostException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 
@@ -38,216 +37,212 @@ import org.apache.tez.runtime.api.events.TaskStatusUpdateEvent;
 import org.apache.tez.runtime.api.impl.EventType;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
-import org.apache.hadoop.ipc.ProtocolSignature;
-import org.apache.hadoop.ipc.RPC;
-import org.apache.hadoop.ipc.Server;
-import org.apache.hadoop.net.NetUtils;
-import org.apache.hadoop.security.authorize.PolicyProvider;
+import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.tez.common.ReflectionUtils;
+import org.apache.tez.dag.api.TaskCommunicator;
+import org.apache.tez.dag.api.TaskCommunicatorContext;
+import org.apache.tez.dag.api.TaskHeartbeatResponse;
+import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.TezException;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.util.ConverterUtils;
-import org.apache.tez.common.ContainerContext;
-import org.apache.tez.common.ContainerTask;
-import org.apache.tez.common.TezConverterUtils;
-import org.apache.tez.common.TezLocalResource;
-import org.apache.tez.common.TezTaskUmbilicalProtocol;
-import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TaskHeartbeatRequest;
 import org.apache.tez.dag.app.dag.DAG;
 import org.apache.tez.dag.app.dag.Task;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely;
 import org.apache.tez.dag.app.dag.event.VertexEventRouteEvent;
+import org.apache.tez.dag.app.rm.TaskSchedulerService;
 import org.apache.tez.dag.app.rm.container.AMContainerTask;
-import org.apache.tez.dag.app.security.authorize.TezAMPolicyProvider;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezVertexID;
 import org.apache.tez.runtime.api.impl.TezEvent;
-import org.apache.tez.runtime.api.impl.TezHeartbeatRequest;
-import org.apache.tez.runtime.api.impl.TezHeartbeatResponse;
 import org.apache.tez.common.security.JobTokenSecretManager;
 
-import com.google.common.collect.Maps;
 
 @SuppressWarnings("unchecked")
+@InterfaceAudience.Private
 public class TaskAttemptListenerImpTezDag extends AbstractService implements
-    TezTaskUmbilicalProtocol, TaskAttemptListener {
-
-  private static final ContainerTask TASK_FOR_INVALID_JVM = new ContainerTask(
-      null, true, null, null, false);
+    TaskAttemptListener, TaskCommunicatorContext {
 
   private static final Logger LOG = LoggerFactory
       .getLogger(TaskAttemptListenerImpTezDag.class);
 
   private final AppContext context;
+  private TaskCommunicator taskCommunicator;
 
   protected final TaskHeartbeatHandler taskHeartbeatHandler;
   protected final ContainerHeartbeatHandler containerHeartbeatHandler;
-  private final JobTokenSecretManager jobTokenSecretManager;
-  private InetSocketAddress address;
-  private Server server;
-
-  static class ContainerInfo {
-    ContainerInfo() {
-      this.lastReponse = null;
-      this.lastRequestId = 0;
-      this.amContainerTask = null;
-      this.taskPulled = false;
+
+  private final TaskHeartbeatResponse RESPONSE_SHOULD_DIE = new TaskHeartbeatResponse(true, null);
+
+  private final ConcurrentMap<TezTaskAttemptID, ContainerId> registeredAttempts =
+      new ConcurrentHashMap<TezTaskAttemptID, ContainerId>();
+  private final ConcurrentMap<ContainerId, ContainerInfo> registeredContainers =
+      new ConcurrentHashMap<ContainerId, ContainerInfo>();
+
+  // Defined primarily to work around ConcurrentMaps not accepting null values
+  private static final class ContainerInfo {
+    TezTaskAttemptID taskAttemptId;
+    ContainerInfo(TezTaskAttemptID taskAttemptId) {
+      this.taskAttemptId = taskAttemptId;
     }
-    long lastRequestId;
-    TezHeartbeatResponse lastReponse;
-    AMContainerTask amContainerTask;
-    boolean taskPulled;
   }
 
-  private ConcurrentMap<TezTaskAttemptID, ContainerId> attemptToInfoMap =
-      new ConcurrentHashMap<TezTaskAttemptID, ContainerId>();
+  private static final ContainerInfo NULL_CONTAINER_INFO = new ContainerInfo(null);
 
-  private ConcurrentHashMap<ContainerId, ContainerInfo> registeredContainers =
-      new ConcurrentHashMap<ContainerId, ContainerInfo>();
 
   public TaskAttemptListenerImpTezDag(AppContext context,
-      TaskHeartbeatHandler thh, ContainerHeartbeatHandler chh,
-      JobTokenSecretManager jobTokenSecretManager) {
+                                      TaskHeartbeatHandler thh, ContainerHeartbeatHandler chh,
+                                      // TODO TEZ-2003 pre-merge. Remove reference to JobTokenSecretManager.
+                                      JobTokenSecretManager jobTokenSecretManager) {
     super(TaskAttemptListenerImpTezDag.class.getName());
     this.context = context;
-    this.jobTokenSecretManager = jobTokenSecretManager;
     this.taskHeartbeatHandler = thh;
     this.containerHeartbeatHandler = chh;
+    this.taskCommunicator = new TezTaskCommunicatorImpl(this);
   }
 
   @Override
-  public void serviceStart() {
-    startRpcServer();
-  }
-
-  protected void startRpcServer() {
-    Configuration conf = getConfig();
-    if (!conf.getBoolean(TezConfiguration.TEZ_LOCAL_MODE, TezConfiguration.TEZ_LOCAL_MODE_DEFAULT)) {
-      try {
-        server = new RPC.Builder(conf)
-            .setProtocol(TezTaskUmbilicalProtocol.class)
-            .setBindAddress("0.0.0.0")
-            .setPort(0)
-            .setInstance(this)
-            .setNumHandlers(
-                conf.getInt(TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT,
-                    TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT_DEFAULT))
-            .setSecretManager(jobTokenSecretManager).build();
-
-        // Enable service authorization?
-        if (conf.getBoolean(
-            CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
-            false)) {
-          refreshServiceAcls(conf, new TezAMPolicyProvider());
-        }
-
-        server.start();
-        this.address = NetUtils.getConnectAddress(server);
-      } catch (IOException e) {
-        throw new TezUncheckedException(e);
-      }
+  public void serviceInit(Configuration conf) {
+    String taskCommClassName = conf.get(TezConfiguration.TEZ_AM_TASK_COMMUNICATOR_CLASS);
+    if (taskCommClassName == null) {
+      LOG.info("Using Default Task Communicator");
+      this.taskCommunicator = new TezTaskCommunicatorImpl(this);
     } else {
+      LOG.info("Using TaskCommunicator: " + taskCommClassName);
+      Class<? extends TaskCommunicator> taskCommClazz = (Class<? extends TaskCommunicator>) ReflectionUtils
+          .getClazz(taskCommClassName);
       try {
-        this.address = new InetSocketAddress(InetAddress.getLocalHost(), 0);
-      } catch (UnknownHostException e) {
+        Constructor<? extends TaskCommunicator> ctor = taskCommClazz.getConstructor(TaskCommunicatorContext.class);
+        ctor.setAccessible(true);
+        this.taskCommunicator = ctor.newInstance(this);
+      } catch (NoSuchMethodException e) {
+        throw new TezUncheckedException(e);
+      } catch (InvocationTargetException e) {
+        throw new TezUncheckedException(e);
+      } catch (InstantiationException e) {
+        throw new TezUncheckedException(e);
+      } catch (IllegalAccessException e) {
         throw new TezUncheckedException(e);
-      }
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("Not starting TaskAttemptListener RPC in LocalMode");
       }
     }
   }
 
-  void refreshServiceAcls(Configuration configuration,
-      PolicyProvider policyProvider) {
-    this.server.refreshServiceAcl(configuration, policyProvider);
+  @Override
+  public void serviceStart() {
+    taskCommunicator.init(getConfig());
+    taskCommunicator.start();
   }
 
   @Override
   public void serviceStop() {
-    stopRpcServer();
-  }
-
-  protected void stopRpcServer() {
-    if (server != null) {
-      server.stop();
+    if (taskCommunicator != null) {
+      taskCommunicator.stop();
+      taskCommunicator = null;
     }
   }
 
-  public InetSocketAddress getAddress() {
-    return address;
-  }
-
   @Override
-  public long getProtocolVersion(String protocol, long clientVersion)
-      throws IOException {
-    return versionID;
+  public ApplicationAttemptId getApplicationAttemptId() {
+    return context.getApplicationAttemptId();
   }
 
   @Override
-  public ProtocolSignature getProtocolSignature(String protocol,
-      long clientVersion, int clientMethodsHash) throws IOException {
-    return ProtocolSignature.getProtocolSignature(this, protocol,
-        clientVersion, clientMethodsHash);
+  public Credentials getCredentials() {
+    return context.getAppCredentials();
   }
 
   @Override
-  public ContainerTask getTask(ContainerContext containerContext)
-      throws IOException {
+  public TaskHeartbeatResponse heartbeat(TaskHeartbeatRequest request)
+      throws IOException, TezException {
+    ContainerId containerId = ConverterUtils.toContainerId(request
+        .getContainerIdentifier());
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Received heartbeat from container"
+          + ", request=" + request);
+    }
 
-    ContainerTask task = null;
+    if (!registeredContainers.containsKey(containerId)) {
+      LOG.warn("Received task heartbeat from unknown container with id: " + containerId +
+          ", asking it to die");
+      return RESPONSE_SHOULD_DIE;
+    }
 
-    if (containerContext == null || containerContext.getContainerIdentifier() == null) {
-      LOG.info("Invalid task request with an empty containerContext or containerId");
-      task = TASK_FOR_INVALID_JVM;
-    } else {
-      ContainerId containerId = ConverterUtils.toContainerId(containerContext
-          .getContainerIdentifier());
+    // A heartbeat can come in anytime. The AM may have made a decision to kill a running task/container
+    // meanwhile. If the decision is processed through the pipeline before the heartbeat is processed,
+    // the heartbeat will be dropped. Otherwise the heartbeat will be processed - and the system
+    // know how to handle this - via FailedInputEvents for example (relevant only if the heartbeat has events).
+    // So - avoiding synchronization.
+
+    pingContainerHeartbeatHandler(containerId);
+    List<TezEvent> outEvents = null;
+    TezTaskAttemptID taskAttemptID = request.getTaskAttemptId();
+    if (taskAttemptID != null) {
+      ContainerId containerIdFromMap = registeredAttempts.get(taskAttemptID);
+      if (containerIdFromMap == null || !containerIdFromMap.equals(containerId)) {
+        // This can happen when a task heartbeats. Meanwhile the container is unregistered.
+        // The information will eventually make it through to the plugin via a corresponding unregister.
+        // There's a race in that case between the unregister making it through, and this method returning.
+        // TODO TEZ-2003. An exception back is likely a better approach than sending a shouldDie = true,
+        // so that the plugin can handle the scenario. Alternately augment the response with error codes.
+        // Error codes would be better than exceptions.
+        LOG.info("Attempt: " + taskAttemptID + " is not recognized for heartbeats");
+        return RESPONSE_SHOULD_DIE;
+      }
+
+      List<TezEvent> inEvents = request.getEvents();
       if (LOG.isDebugEnabled()) {
-        LOG.debug("Container with id: " + containerId + " asked for a task");
+        LOG.debug("Ping from " + taskAttemptID.toString() +
+            " events: " + (inEvents != null ? inEvents.size() : -1));
       }
-      if (!registeredContainers.containsKey(containerId)) {
-        if(context.getAllContainers().get(containerId) == null) {
-          LOG.info("Container with id: " + containerId
-              + " is invalid and will be killed");
-        } else {
-          LOG.info("Container with id: " + containerId
-              + " is valid, but no longer registered, and will be killed");
-        }
-        task = TASK_FOR_INVALID_JVM;
-      } else {
-        pingContainerHeartbeatHandler(containerId);
-        task = getContainerTask(containerId);
-        if (task == null) {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("No task current assigned to Container with id: " + containerId);
-          }
-        } else if (task == TASK_FOR_INVALID_JVM) { 
-          LOG.info("Container with id: " + containerId
-              + " is valid, but no longer registered, and will be killed. Race condition.");          
+
+      List<TezEvent> otherEvents = new ArrayList<TezEvent>();
+      for (TezEvent tezEvent : ListUtils.emptyIfNull(inEvents)) {
+        final EventType eventType = tezEvent.getEventType();
+        if (eventType == EventType.TASK_STATUS_UPDATE_EVENT ||
+            eventType == EventType.TASK_ATTEMPT_COMPLETED_EVENT) {
+          context.getEventHandler()
+              .handle(getTaskAttemptEventFromTezEvent(taskAttemptID, tezEvent));
         } else {
-          context.getEventHandler().handle(
-              new TaskAttemptEventStartedRemotely(task.getTaskSpec()
-                  .getTaskAttemptID(), containerId, context
-                  .getApplicationACLs()));
-          LOG.info("Container with id: " + containerId + " given task: "
-              + task.getTaskSpec().getTaskAttemptID());
+          otherEvents.add(tezEvent);
         }
       }
+      if(!otherEvents.isEmpty()) {
+        TezVertexID vertexId = taskAttemptID.getTaskID().getVertexID();
+        context.getEventHandler().handle(
+            new VertexEventRouteEvent(vertexId, Collections.unmodifiableList(otherEvents)));
+      }
+      taskHeartbeatHandler.pinged(taskAttemptID);
+      outEvents = context
+          .getCurrentDAG()
+          .getVertex(taskAttemptID.getTaskID().getVertexID())
+          .getTask(taskAttemptID.getTaskID())
+          .getTaskAttemptTezEvents(taskAttemptID, request.getStartIndex(),
+              request.getMaxEvents());
     }
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("getTask returning task: " + task);
-    }
-    return task;
+    return new TaskHeartbeatResponse(false, outEvents);
+  }
+
+  @Override
+  public boolean isKnownContainer(ContainerId containerId) {
+    return context.getAllContainers().get(containerId) != null;
+  }
+
+  @Override
+  public void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId) {
+    context.getEventHandler().handle(new TaskAttemptEventStartedRemotely(taskAttemptID, containerId, null));
+    pingContainerHeartbeatHandler(containerId);
   }
 
   /**
    * Child checking whether it can commit.
-   *
+   * <p/>
    * <br/>
    * Repeatedly polls the ApplicationMaster whether it
    * {@link Task#canCommit(TezTaskAttemptID)} This is * a legacy from the
@@ -270,25 +265,12 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
   }
 
   @Override
-  public void unregisterTaskAttempt(TezTaskAttemptID attemptId) {
-    ContainerId containerId = attemptToInfoMap.get(attemptId);
-    if(containerId == null) {
-      LOG.warn("Unregister task attempt: " + attemptId + " from unknown container");
-      return;
-    }
-    ContainerInfo containerInfo = registeredContainers.get(containerId);
-    if(containerInfo == null) {
-      LOG.warn("Unregister task attempt: " + attemptId +
-          " from non-registered container: " + containerId);
-      return;
-    }
-    synchronized (containerInfo) {
-      containerInfo.amContainerTask = null;
-      attemptToInfoMap.remove(attemptId);
-    }
-
+  public InetSocketAddress getAddress() {
+    return taskCommunicator.getAddress();
   }
 
+  // The TaskAttemptListener register / unregister methods in this class are not thread safe.
+  // The Tez framework should not invoke these methods from multiple threads.
   @Override
   public void dagComplete(DAG dag) {
     // TODO TEZ-2335. Cleanup TaskHeartbeat handler structures.
@@ -308,50 +290,82 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
   @Override
   public void registerRunningContainer(ContainerId containerId) {
     if (LOG.isDebugEnabled()) {
-      LOG.debug("ContainerId: " + containerId
-          + " registered with TaskAttemptListener");
+      LOG.debug("ContainerId: " + containerId + " registered with TaskAttemptListener");
     }
-    ContainerInfo oldInfo = registeredContainers.put(containerId, new ContainerInfo());
-    if(oldInfo != null) {
+    ContainerInfo oldInfo = registeredContainers.put(containerId, NULL_CONTAINER_INFO);
+    if (oldInfo != null) {
       throw new TezUncheckedException(
           "Multiple registrations for containerId: " + containerId);
     }
+    NodeId nodeId = context.getAllContainers().get(containerId).getContainer().getNodeId();
+    taskCommunicator.registerRunningContainer(containerId, nodeId.getHost(), nodeId.getPort());
+  }
+
+  @Override
+  public void unregisterRunningContainer(ContainerId containerId) {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Unregistering Container from TaskAttemptListener: " + containerId);
+    }
+    ContainerInfo containerInfo = registeredContainers.remove(containerId);
+    if (containerInfo.taskAttemptId != null) {
+      registeredAttempts.remove(containerInfo.taskAttemptId);
+    }
+    taskCommunicator.registerContainerEnd(containerId);
   }
 
   @Override
   public void registerTaskAttempt(AMContainerTask amContainerTask,
-      ContainerId containerId) {
+                                  ContainerId containerId) {
     ContainerInfo containerInfo = registeredContainers.get(containerId);
-    if(containerInfo == null) {
+    if (containerInfo == null) {
       throw new TezUncheckedException("Registering task attempt: "
           + amContainerTask.getTask().getTaskAttemptID() + " to unknown container: " + containerId);
     }
-    synchronized (containerInfo) {
-      if(containerInfo.amContainerTask != null) {
-        throw new TezUncheckedException("Registering task attempt: "
-            + amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId
-            + " with existing assignment to: " + containerInfo.amContainerTask.getTask().getTaskAttemptID());
-      }
-      containerInfo.amContainerTask = amContainerTask;
-      containerInfo.taskPulled = false;
-
-      ContainerId containerIdFromMap =
-          attemptToInfoMap.put(amContainerTask.getTask().getTaskAttemptID(), containerId);
-      if(containerIdFromMap != null) {
-        throw new TezUncheckedException("Registering task attempt: "
-            + amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId
-            + " when already assigned to: " + containerIdFromMap);
-      }
+    if (containerInfo.taskAttemptId != null) {
+      throw new TezUncheckedException("Registering task attempt: "
+          + amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId
+          + " with existing assignment to: " +
+          containerInfo.taskAttemptId);
     }
+
+    if (containerInfo.taskAttemptId != null) {
+      throw new TezUncheckedException("Registering task attempt: "
+          + amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId
+          + " with existing assignment to: " +
+          containerInfo.taskAttemptId);
+    }
+
+    // Explicitly putting in a new entry so that synchronization is not required on the existing element in the map.
+    registeredContainers.put(containerId, new ContainerInfo(amContainerTask.getTask().getTaskAttemptID()));
+
+    ContainerId containerIdFromMap = registeredAttempts.put(
+        amContainerTask.getTask().getTaskAttemptID(), containerId);
+    if (containerIdFromMap != null) {
+      throw new TezUncheckedException("Registering task attempt: "
+          + amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId
+          + " when already assigned to: " + containerIdFromMap);
+    }
+    taskCommunicator.registerRunningTaskAttempt(containerId, amContainerTask.getTask(),
+        amContainerTask.getAdditionalResources(), amContainerTask.getCredentials(),
+        amContainerTask.haveCredentialsChanged());
   }
 
   @Override
-  public void unregisterRunningContainer(ContainerId containerId) {
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Unregistering Container from TaskAttemptListener: "
-          + containerId);
+  public void unregisterTaskAttempt(TezTaskAttemptID attemptId) {
+    ContainerId containerId = registeredAttempts.remove(attemptId);
+    if (containerId == null) {
+      LOG.warn("Unregister task attempt: " + attemptId + " from unknown container");
+      return;
+    }
+    ContainerInfo containerInfo = registeredContainers.get(containerId);
+    if (containerInfo == null) {
+      LOG.warn("Unregister task attempt: " + attemptId +
+          " from non-registered container: " + containerId);
+      return;
     }
-    registeredContainers.remove(containerId);
+    // Explicitly putting in a new entry so that synchronization is not required on the existing element in the map.
+    registeredContainers.put(containerId, NULL_CONTAINER_INFO);
+    taskCommunicator.unregisterRunningTaskAttempt(attemptId);
   }
 
   private void pingContainerHeartbeatHandler(ContainerId containerId) {
@@ -359,7 +373,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
   }
 
   private void pingContainerHeartbeatHandler(TezTaskAttemptID taskAttemptId) {
-    ContainerId containerId = attemptToInfoMap.get(taskAttemptId);
+    ContainerId containerId = registeredAttempts.get(taskAttemptId);
     if (containerId != null) {
       containerHeartbeatHandler.pinged(containerId);
     } else {
@@ -368,141 +382,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
     }
   }
 
-  @Override
-  public TezHeartbeatResponse heartbeat(TezHeartbeatRequest request)
-      throws IOException, TezException {
-    ContainerId containerId = ConverterUtils.toContainerId(request
-        .getContainerIdentifier());
-    long requestId = request.getRequestId();
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Received heartbeat from container"
-          + ", request=" + request);
-    }
-
-    ContainerInfo containerInfo = registeredContainers.get(containerId);
-    if(containerInfo == null) {
-      LOG.warn("Received task heartbeat from unknown container with id: " + containerId +
-          ", asking it to die");
-      TezHeartbeatResponse response = new TezHeartbeatResponse();
-      response.setLastRequestId(requestId);
-      response.setShouldDie();
-      return response;
-    }
-
-    synchronized (containerInfo) {
-      pingContainerHeartbeatHandler(containerId);
-
-      if(containerInfo.lastRequestId == requestId) {
-        LOG.warn("Old sequenceId received: " + requestId
-            + ", Re-sending last response to client");
-        return containerInfo.lastReponse;
-      }
-
-      TezHeartbeatResponse response = new TezHeartbeatResponse();
-      response.setLastRequestId(requestId);
-
-      TezTaskAttemptID taskAttemptID = request.getCurrentTaskAttemptID();
-      if (taskAttemptID != null) {
-        ContainerId containerIdFromMap = attemptToInfoMap.get(taskAttemptID);
-        if(containerIdFromMap == null || !containerIdFromMap.equals(containerId)) {
-          throw new TezException("Attempt " + taskAttemptID
-            + " is not recognized for heartbeat");
-        }
-
-        if(containerInfo.lastRequestId+1 != requestId) {
-          throw new TezException("Container " + containerId
-              + " has invalid request id. Expected: "
-              + containerInfo.lastRequestId+1
-              + " and actual: " + requestId);
-        }
-
-        List<TezEvent> inEvents = request.getEvents();
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("Ping from " + taskAttemptID.toString() +
-              " events: " + (inEvents != null? inEvents.size() : -1));
-        }
-
-        List<TezEvent> otherEvents = new ArrayList<TezEvent>();
-        // route TASK_STATUS_UPDATE_EVENT directly to TaskAttempt and route other events
-        // (DATA_MOVEMENT_EVENT, TASK_ATTEMPT_COMPLETED_EVENT, TASK_ATTEMPT_FAILED_EVENT)
-        // to VertexImpl to ensure the events ordering
-        //  1. DataMovementEvent is logged as RecoveryEvent before TaskAttemptFinishedEvent
-        //  2. TaskStatusEvent is handled before TaskAttemptFinishedEvent
-        for (TezEvent tezEvent : ListUtils.emptyIfNull(inEvents)) {
-          final EventType eventType = tezEvent.getEventType();
-          if (eventType == EventType.TASK_STATUS_UPDATE_EVENT) {
-            TaskAttemptEvent taskAttemptEvent = new TaskAttemptEventStatusUpdate(taskAttemptID,
-                    (TaskStatusUpdateEvent) tezEvent.getEvent());
-            context.getEventHandler().handle(taskAttemptEvent);
-          } else {
-            otherEvents.add(tezEvent);
-          }
-        }
-        if(!otherEvents.isEmpty()) {
-          TezVertexID vertexId = taskAttemptID.getTaskID().getVertexID();
-          context.getEventHandler().handle(
-              new VertexEventRouteEvent(vertexId, Collections.unmodifiableList(otherEvents)));
-        }
-        taskHeartbeatHandler.pinged(taskAttemptID);
-        TaskAttemptEventInfo eventInfo = context
-            .getCurrentDAG()
-            .getVertex(taskAttemptID.getTaskID().getVertexID())
-            .getTaskAttemptTezEvents(taskAttemptID, request.getStartIndex(),
-                request.getMaxEvents());
-        response.setEvents(eventInfo.getEvents());
-        response.setNextFromEventId(eventInfo.getNextFromEventId());
-      }
-      containerInfo.lastRequestId = requestId;
-      containerInfo.lastReponse = response;
-      return response;
-    }
-  }
-
-  private Map<String, TezLocalResource> convertLocalResourceMap(Map<String, LocalResource> ylrs)
-      throws IOException {
-    Map<String, TezLocalResource> tlrs = Maps.newHashMap();
-    if (ylrs != null) {
-      for (Entry<String, LocalResource> ylrEntry : ylrs.entrySet()) {
-        TezLocalResource tlr;
-        try {
-          tlr = TezConverterUtils.convertYarnLocalResourceToTez(ylrEntry.getValue());
-        } catch (URISyntaxException e) {
-         throw new IOException(e);
-        }
-        tlrs.put(ylrEntry.getKey(), tlr);
-      }
-    }
-    return tlrs;
-  }
-
-  private ContainerTask getContainerTask(ContainerId containerId) throws IOException {
-    ContainerTask containerTask = null;
-    ContainerInfo containerInfo = registeredContainers.get(containerId);
-    if (containerInfo == null) {
-      // This can happen if an unregisterTask comes in after we've done the initial checks for
-      // registered containers. (Race between getTask from the container, and a potential STOP_CONTAINER
-      // from somewhere within the AM)
-      // Implies that an un-registration has taken place and the container needs to be asked to die.
-      LOG.info("Container with id: " + containerId
-          + " is valid, but no longer registered, and will be killed");
-      containerTask = TASK_FOR_INVALID_JVM;
-    } else {
-      synchronized (containerInfo) {
-        if (containerInfo.amContainerTask != null) {
-          if (!containerInfo.taskPulled) {
-            containerInfo.taskPulled = true;
-            AMContainerTask amContainerTask = containerInfo.amContainerTask;
-            containerTask = new ContainerTask(amContainerTask.getTask(), false,
-                convertLocalResourceMap(amContainerTask.getAdditionalResources()),
-                amContainerTask.getCredentials(), amContainerTask.haveCredentialsChanged());
-          } else {
-            containerTask = null;
-          }
-        } else {
-          containerTask = null;
-        }
-      }
-    }
-    return containerTask;
+  public TaskCommunicator getTaskCommunicator() {
+    return taskCommunicator;
   }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
new file mode 100644
index 0000000..5652937
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -0,0 +1,474 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.InetSocketAddress;
+import java.net.URISyntaxException;
+import java.net.UnknownHostException;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.ipc.ProtocolSignature;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.ipc.Server;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.security.authorize.PolicyProvider;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.tez.common.*;
+import org.apache.tez.common.ContainerContext;
+import org.apache.tez.common.security.JobTokenIdentifier;
+import org.apache.tez.common.security.JobTokenSecretManager;
+import org.apache.tez.common.security.TokenCache;
+import org.apache.tez.dag.api.TaskCommunicator;
+import org.apache.tez.dag.api.TaskCommunicatorContext;
+import org.apache.tez.dag.api.TaskHeartbeatRequest;
+import org.apache.tez.dag.api.TaskHeartbeatResponse;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezException;
+import org.apache.tez.dag.api.TezUncheckedException;
+import org.apache.tez.dag.app.security.authorize.TezAMPolicyProvider;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.runtime.api.impl.TaskSpec;
+import org.apache.tez.runtime.api.impl.TezHeartbeatRequest;
+import org.apache.tez.runtime.api.impl.TezHeartbeatResponse;
+
+@InterfaceAudience.Private
+public class TezTaskCommunicatorImpl extends TaskCommunicator {
+
+  private static final Log LOG = LogFactory.getLog(TezTaskCommunicatorImpl.class);
+
+  private static final ContainerTask TASK_FOR_INVALID_JVM = new ContainerTask(
+      null, true, null, null, false);
+
+  private final TaskCommunicatorContext taskCommunicatorContext;
+
+  private final ConcurrentMap<ContainerId, ContainerInfo> registeredContainers =
+      new ConcurrentHashMap<ContainerId, ContainerInfo>();
+  private final ConcurrentMap<TaskAttempt, ContainerId> attemptToContainerMap =
+      new ConcurrentHashMap<TaskAttempt, ContainerId>();
+
+  private final TezTaskUmbilicalProtocol taskUmbilical;
+  private InetSocketAddress address;
+  private Server server;
+
+  private static final class ContainerInfo {
+
+    ContainerInfo(ContainerId containerId) {
+      this.containerId = containerId;
+    }
+
+    ContainerId containerId;
+    TezHeartbeatResponse lastResponse = null;
+    TaskSpec taskSpec = null;
+    long lastRequestId = 0;
+    Map<String, LocalResource> additionalLRs = null;
+    Credentials credentials = null;
+    boolean credentialsChanged = false;
+    boolean taskPulled = false;
+
+    void reset() {
+      taskSpec = null;
+      additionalLRs = null;
+      credentials = null;
+      credentialsChanged = false;
+      taskPulled = false;
+    }
+  }
+
+
+
+  /**
+   * Construct the service.
+   */
+  public TezTaskCommunicatorImpl(TaskCommunicatorContext taskCommunicatorContext) {
+    super(TezTaskCommunicatorImpl.class.getName());
+    this.taskCommunicatorContext = taskCommunicatorContext;
+    this.taskUmbilical = new TezTaskUmbilicalProtocolImpl();
+  }
+
+
+  @Override
+  public void serviceStart() {
+
+    startRpcServer();
+  }
+
+  @Override
+  public void serviceStop() {
+    stopRpcServer();
+  }
+
+  protected void startRpcServer() {
+    Configuration conf = getConfig();
+    if (!conf.getBoolean(TezConfiguration.TEZ_LOCAL_MODE, TezConfiguration.TEZ_LOCAL_MODE_DEFAULT)) {
+      try {
+        JobTokenSecretManager jobTokenSecretManager =
+            new JobTokenSecretManager();
+        Token<JobTokenIdentifier> sessionToken = TokenCache.getSessionToken(taskCommunicatorContext.getCredentials());
+        jobTokenSecretManager.addTokenForJob(
+            taskCommunicatorContext.getApplicationAttemptId().getApplicationId().toString(), sessionToken);
+
+        server = new RPC.Builder(conf)
+            .setProtocol(TezTaskUmbilicalProtocol.class)
+            .setBindAddress("0.0.0.0")
+            .setPort(0)
+            .setInstance(taskUmbilical)
+            .setNumHandlers(
+                conf.getInt(TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT,
+                    TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT_DEFAULT))
+            .setSecretManager(jobTokenSecretManager).build();
+
+        // Enable service authorization?
+        if (conf.getBoolean(
+            CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
+            false)) {
+          refreshServiceAcls(conf, new TezAMPolicyProvider());
+        }
+
+        server.start();
+        this.address = NetUtils.getConnectAddress(server);
+      } catch (IOException e) {
+        throw new TezUncheckedException(e);
+      }
+    } else {
+      try {
+        this.address = new InetSocketAddress(InetAddress.getLocalHost(), 0);
+      } catch (UnknownHostException e) {
+        throw new TezUncheckedException(e);
+      }
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Not starting TaskAttemptListener RPC in LocalMode");
+      }
+    }
+  }
+
+  protected void stopRpcServer() {
+    if (server != null) {
+      server.stop();
+      server = null;
+    }
+  }
+
+  private void refreshServiceAcls(Configuration configuration,
+                                  PolicyProvider policyProvider) {
+    this.server.refreshServiceAcl(configuration, policyProvider);
+  }
+
+  @Override
+  public void registerRunningContainer(ContainerId containerId, String host, int port) {
+    ContainerInfo oldInfo = registeredContainers.putIfAbsent(containerId, new ContainerInfo(containerId));
+    if (oldInfo != null) {
+      throw new TezUncheckedException("Multiple registrations for containerId: " + containerId);
+    }
+  }
+
+  @Override
+  public void registerContainerEnd(ContainerId containerId) {
+    ContainerInfo containerInfo = registeredContainers.remove(containerId);
+    if (containerInfo != null) {
+      synchronized(containerInfo) {
+        if (containerInfo.taskSpec != null && containerInfo.taskSpec.getTaskAttemptID() != null) {
+          attemptToContainerMap.remove(containerInfo.taskSpec.getTaskAttemptID());
+        }
+      }
+    }
+  }
+
+  @Override
+  public void registerRunningTaskAttempt(ContainerId containerId, TaskSpec taskSpec,
+                                         Map<String, LocalResource> additionalResources,
+                                         Credentials credentials, boolean credentialsChanged) {
+
+    ContainerInfo containerInfo = registeredContainers.get(containerId);
+    Preconditions.checkNotNull(containerInfo,
+        "Cannot register task attempt: " + taskSpec.getTaskAttemptID() + " to unknown container: " +
+            containerId);
+    synchronized (containerInfo) {
+      if (containerInfo.taskSpec != null) {
+        throw new TezUncheckedException(
+            "Cannot register task: " + taskSpec.getTaskAttemptID() + " to container: " +
+                containerId + " , with pre-existing assignment: " +
+                containerInfo.taskSpec.getTaskAttemptID());
+      }
+      containerInfo.taskSpec = taskSpec;
+      containerInfo.additionalLRs = additionalResources;
+      containerInfo.credentials = credentials;
+      containerInfo.credentialsChanged = credentialsChanged;
+      containerInfo.taskPulled = false;
+
+      ContainerId oldId = attemptToContainerMap.putIfAbsent(new TaskAttempt(taskSpec.getTaskAttemptID()), containerId);
+      if (oldId != null) {
+        throw new TezUncheckedException(
+            "Attempting to register an already registered taskAttempt with id: " +
+                taskSpec.getTaskAttemptID() + " to containerId: " + containerId +
+                ". Already registered to containerId: " + oldId);
+      }
+    }
+
+  }
+
+  @Override
+  public void unregisterRunningTaskAttempt(TezTaskAttemptID taskAttemptID) {
+    TaskAttempt taskAttempt = new TaskAttempt(taskAttemptID);
+    ContainerId containerId = attemptToContainerMap.remove(taskAttempt);
+    if(containerId == null) {
+      LOG.warn("Unregister task attempt: " + taskAttempt + " from unknown container");
+      return;
+    }
+    ContainerInfo containerInfo = registeredContainers.get(containerId);
+    if (containerInfo == null) {
+      LOG.warn("Unregister task attempt: " + taskAttempt +
+          " from non-registered container: " + containerId);
+      return;
+    }
+    synchronized (containerInfo) {
+      containerInfo.reset();
+      attemptToContainerMap.remove(taskAttempt);
+    }
+  }
+
+  @Override
+  public InetSocketAddress getAddress() {
+    return address;
+  }
+
+  public TezTaskUmbilicalProtocol getUmbilical() {
+    return this.taskUmbilical;
+  }
+
+  private class TezTaskUmbilicalProtocolImpl implements TezTaskUmbilicalProtocol {
+
+    @Override
+    public ContainerTask getTask(ContainerContext containerContext) throws IOException {
+      ContainerTask task = null;
+      if (containerContext == null || containerContext.getContainerIdentifier() == null) {
+        LOG.info("Invalid task request with an empty containerContext or containerId");
+        task = TASK_FOR_INVALID_JVM;
+      } else {
+        ContainerId containerId = ConverterUtils.toContainerId(containerContext
+            .getContainerIdentifier());
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Container with id: " + containerId + " asked for a task");
+        }
+        task = getContainerTask(containerId);
+        if (task != null && !task.shouldDie()) {
+          taskCommunicatorContext
+              .taskStartedRemotely(task.getTaskSpec().getTaskAttemptID(), containerId);
+        }
+      }
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("getTask returning task: " + task);
+      }
+      return task;
+    }
+
+    @Override
+    public boolean canCommit(TezTaskAttemptID taskAttemptId) throws IOException {
+      return taskCommunicatorContext.canCommit(taskAttemptId);
+    }
+
+    @Override
+    public TezHeartbeatResponse heartbeat(TezHeartbeatRequest request) throws IOException,
+        TezException {
+      ContainerId containerId = ConverterUtils.toContainerId(request.getContainerIdentifier());
+      long requestId = request.getRequestId();
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Received heartbeat from container"
+            + ", request=" + request);
+      }
+
+      ContainerInfo containerInfo = registeredContainers.get(containerId);
+      if (containerInfo == null) {
+        LOG.warn("Received task heartbeat from unknown container with id: " + containerId +
+            ", asking it to die");
+        TezHeartbeatResponse response = new TezHeartbeatResponse();
+        response.setLastRequestId(requestId);
+        response.setShouldDie();
+        return response;
+      }
+
+      synchronized (containerInfo) {
+        if (containerInfo.lastRequestId == requestId) {
+          LOG.warn("Old sequenceId received: " + requestId
+              + ", Re-sending last response to client");
+          return containerInfo.lastResponse;
+        }
+      }
+
+      TaskHeartbeatResponse tResponse = null;
+
+
+      TezTaskAttemptID taskAttemptID = request.getCurrentTaskAttemptID();
+      if (taskAttemptID != null) {
+        synchronized (containerInfo) {
+          ContainerId containerIdFromMap = attemptToContainerMap.get(new TaskAttempt(taskAttemptID));
+          if (containerIdFromMap == null || !containerIdFromMap.equals(containerId)) {
+            throw new TezException("Attempt " + taskAttemptID
+                + " is not recognized for heartbeat");
+          }
+
+          if (containerInfo.lastRequestId + 1 != requestId) {
+            throw new TezException("Container " + containerId
+                + " has invalid request id. Expected: "
+                + containerInfo.lastRequestId + 1
+                + " and actual: " + requestId);
+          }
+        }
+        TaskHeartbeatRequest tRequest = new TaskHeartbeatRequest(request.getContainerIdentifier(),
+            request.getCurrentTaskAttemptID(), request.getEvents(), request.getStartIndex(),
+            request.getMaxEvents());
+        tResponse = taskCommunicatorContext.heartbeat(tRequest);
+      }
+      TezHeartbeatResponse response;
+      if (tResponse == null) {
+        response = new TezHeartbeatResponse();
+      } else {
+        response = new TezHeartbeatResponse(tResponse.getEvents());
+      }
+      response.setLastRequestId(requestId);
+      containerInfo.lastRequestId = requestId;
+      containerInfo.lastResponse = response;
+      return response;
+    }
+
+
+    // TODO Remove this method once we move to the Protobuf RPC engine
+    @Override
+    public long getProtocolVersion(String protocol, long clientVersion) throws IOException {
+      return versionID;
+    }
+
+    // TODO Remove this method once we move to the Protobuf RPC engine
+    @Override
+    public ProtocolSignature getProtocolSignature(String protocol, long clientVersion,
+                                                  int clientMethodsHash) throws IOException {
+      return ProtocolSignature.getProtocolSignature(this, protocol,
+          clientVersion, clientMethodsHash);
+    }
+  }
+
+  private ContainerTask getContainerTask(ContainerId containerId) throws IOException {
+    ContainerInfo containerInfo = registeredContainers.get(containerId);
+    ContainerTask task = null;
+    if (containerInfo == null) {
+      if (taskCommunicatorContext.isKnownContainer(containerId)) {
+        LOG.info("Container with id: " + containerId
+            + " is valid, but no longer registered, and will be killed");
+      } else {
+        LOG.info("Container with id: " + containerId
+            + " is invalid and will be killed");
+      }
+      task = TASK_FOR_INVALID_JVM;
+    } else {
+      synchronized (containerInfo) {
+        if (containerInfo.taskSpec != null) {
+          if (!containerInfo.taskPulled) {
+            containerInfo.taskPulled = true;
+            task = constructContainerTask(containerInfo);
+          } else {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("Task " + containerInfo.taskSpec.getTaskAttemptID() +
+                  " already sent to container: " + containerId);
+            }
+            task = null;
+          }
+        } else {
+          task = null;
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("No task assigned yet for running container: " + containerId);
+          }
+        }
+      }
+    }
+    return task;
+  }
+
+  private ContainerTask constructContainerTask(ContainerInfo containerInfo) throws IOException {
+    return new ContainerTask(containerInfo.taskSpec, false,
+        convertLocalResourceMap(containerInfo.additionalLRs), containerInfo.credentials,
+        containerInfo.credentialsChanged);
+  }
+
+  private Map<String, TezLocalResource> convertLocalResourceMap(Map<String, LocalResource> ylrs)
+      throws IOException {
+    Map<String, TezLocalResource> tlrs = Maps.newHashMap();
+    if (ylrs != null) {
+      for (Map.Entry<String, LocalResource> ylrEntry : ylrs.entrySet()) {
+        TezLocalResource tlr;
+        try {
+          tlr = TezConverterUtils.convertYarnLocalResourceToTez(ylrEntry.getValue());
+        } catch (URISyntaxException e) {
+          throw new IOException(e);
+        }
+        tlrs.put(ylrEntry.getKey(), tlr);
+      }
+    }
+    return tlrs;
+  }
+
+
+  // Holder for Task information, which eventually will likely be VertexImplm taskIndex, attemptIndex
+  private static class TaskAttempt {
+    // TODO TEZ-2003 Change this to work with VertexName, int id, int version
+    // TODO TEZ-2003 Avoid constructing this unit all over the place
+    private TezTaskAttemptID taskAttemptId;
+
+    TaskAttempt(TezTaskAttemptID taskAttemptId) {
+      this.taskAttemptId = taskAttemptId;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof TaskAttempt)) {
+        return false;
+      }
+
+      TaskAttempt that = (TaskAttempt) o;
+
+      if (!taskAttemptId.equals(that.taskAttemptId)) {
+        return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      return taskAttemptId.hashCode();
+    }
+
+    @Override
+    public String toString() {
+      return "TaskAttempt{" + "taskAttemptId=" + taskAttemptId + '}';
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
index 9faf8c0..e9ba9d7 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
@@ -59,6 +59,8 @@ import org.apache.tez.dag.api.TezException;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.dag.app.TaskAttemptListener;
+import org.apache.tez.dag.app.TaskAttemptListenerImpTezDag;
+import org.apache.tez.dag.app.TezTaskCommunicatorImpl;
 import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
 import org.apache.tez.dag.app.rm.NMCommunicatorLaunchRequestEvent;
 import org.apache.tez.dag.app.rm.NMCommunicatorStopRequestEvent;
@@ -86,7 +88,7 @@ public class LocalContainerLauncher extends AbstractService implements
 
   private static final Logger LOG = LoggerFactory.getLogger(LocalContainerLauncher.class);
   private final AppContext context;
-  private final TaskAttemptListener taskAttemptListener;
+  private final TezTaskUmbilicalProtocol taskUmbilicalProtocol;
   private final AtomicBoolean serviceStopped = new AtomicBoolean(false);
   private final String workingDirectory;
   private final Map<String, String> localEnv = new HashMap<String, String>();
@@ -114,7 +116,9 @@ public class LocalContainerLauncher extends AbstractService implements
                                 String workingDirectory) throws UnknownHostException {
     super(LocalContainerLauncher.class.getName());
     this.context = context;
-    this.taskAttemptListener = taskAttemptListener;
+    TaskAttemptListenerImpTezDag taListener = (TaskAttemptListenerImpTezDag)taskAttemptListener;
+    TezTaskCommunicatorImpl taskComm = (TezTaskCommunicatorImpl) taListener.getTaskCommunicator();
+    this.taskUmbilicalProtocol = taskComm.getUmbilical();
     this.workingDirectory = workingDirectory;
     AuxiliaryServiceHelper.setServiceDataIntoEnv(
         ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID, ByteBuffer.allocate(4).putInt(0), localEnv);
@@ -215,7 +219,7 @@ public class LocalContainerLauncher extends AbstractService implements
         tezChild =
             createTezChild(context.getAMConf(), event.getContainerId(), tokenIdentifier,
                 context.getApplicationAttemptId().getAttemptId(), context.getLocalDirs(),
-                (TezTaskUmbilicalProtocol) taskAttemptListener,
+                taskUmbilicalProtocol,
                 TezCommonUtils.parseCredentialsBytes(event.getContainerLaunchContext().getTokens().array()));
       } catch (InterruptedException e) {
         handleLaunchFailed(e, event.getContainerId());

http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java
index a6b403d..0fc2e12 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java
@@ -22,6 +22,7 @@ import java.util.List;
 
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 
@@ -32,5 +33,5 @@ public interface AMContainer extends EventHandler<AMContainerEvent>{
   public Container getContainer();
   public List<TezTaskAttemptID> getAllTaskAttempts();
   public TezTaskAttemptID getCurrentTaskAttempt();
-  
+
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventAssignTA.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventAssignTA.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventAssignTA.java
index 682cd02..0398882 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventAssignTA.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventAssignTA.java
@@ -27,6 +27,8 @@ import org.apache.tez.runtime.api.impl.TaskSpec;
 
 public class AMContainerEventAssignTA extends AMContainerEvent {
 
+  // TODO TEZ-2003. Add the task priority to this event.
+
   private final TezTaskAttemptID attemptId;
   // TODO Maybe have tht TAL pull the remoteTask from the TaskAttempt itself ?
   private final TaskSpec remoteTaskSpec;

http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
index 330f2b7..1acec9c 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.event.Event;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.state.InvalidStateTransitonException;

http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index 5cd487c..b846922 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -37,6 +37,7 @@ import java.util.concurrent.atomic.AtomicLong;
 import org.apache.tez.dag.app.dag.DAG;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.service.AbstractService;
@@ -50,7 +51,10 @@ import org.apache.tez.client.TezApiVersionInfo;
 import org.apache.tez.common.ContainerContext;
 import org.apache.tez.common.ContainerTask;
 import org.apache.tez.common.counters.TezCounters;
+import org.apache.tez.dag.api.TaskHeartbeatRequest;
+import org.apache.tez.dag.api.TaskHeartbeatResponse;
 import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TaskCommunicator;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.app.launcher.ContainerLauncher;
 import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
@@ -72,8 +76,6 @@ import org.apache.tez.runtime.api.impl.TaskSpec;
 import org.apache.tez.runtime.api.impl.TaskStatistics;
 import org.apache.tez.runtime.api.impl.TezEvent;
 import org.apache.tez.runtime.api.impl.EventMetaData.EventProducerConsumerType;
-import org.apache.tez.runtime.api.impl.TezHeartbeatRequest;
-import org.apache.tez.runtime.api.impl.TezHeartbeatResponse;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
@@ -130,6 +132,7 @@ public class MockDAGAppMaster extends DAGAppMaster {
     Map<ContainerId, ContainerData> containers = Maps.newConcurrentMap();
     ArrayBlockingQueue<Worker> workers;
     TaskAttemptListenerImpTezDag taListener;
+    TezTaskCommunicatorImpl taskCommunicator;
     
     AtomicBoolean startScheduling = new AtomicBoolean(true);
     AtomicBoolean goFlag;
@@ -192,6 +195,7 @@ public class MockDAGAppMaster extends DAGAppMaster {
     @Override
     public void serviceStart() throws Exception {
       taListener = (TaskAttemptListenerImpTezDag) getTaskAttemptListener();
+      taskCommunicator = (TezTaskCommunicatorImpl) taListener.getTaskCommunicator();
       eventHandlingThread = new Thread(this);
       eventHandlingThread.start();
       ExecutorService rawExecutor = Executors.newFixedThreadPool(handlerConcurrency,
@@ -331,10 +335,10 @@ public class MockDAGAppMaster extends DAGAppMaster {
       }
     }
     
-    private void doHeartbeat(TezHeartbeatRequest request, ContainerData cData) throws Exception {
+    private void doHeartbeat(TaskHeartbeatRequest request, ContainerData cData) throws Exception {
       long startTime = System.nanoTime();
       long startCpuTime = threadMxBean.getCurrentThreadCpuTime();
-      TezHeartbeatResponse response = taListener.heartbeat(request);
+      TaskHeartbeatResponse response = taListener.heartbeat(request);
       if (response.shouldDie()) {
         cData.remove();
       } else {
@@ -385,7 +389,8 @@ public class MockDAGAppMaster extends DAGAppMaster {
         try {
           if (cData.taId == null) {
             // if container is not assigned a task, ask for a task
-            ContainerTask cTask = taListener.getTask(new ContainerContext(cData.cIdStr));
+            ContainerTask cTask =
+                taskCommunicator.getUmbilical().getTask(new ContainerContext(cData.cIdStr));
             if (cTask != null) {
               if (cTask.shouldDie()) {
                 cData.remove();
@@ -420,8 +425,11 @@ public class MockDAGAppMaster extends DAGAppMaster {
               float progress = updateProgress ? cData.numUpdates/maxUpdates : 0f;
               events.add(new TezEvent(new TaskStatusUpdateEvent(counters, progress, stats), new EventMetaData(
                   EventProducerConsumerType.SYSTEM, cData.vName, "", cData.taId)));
-              TezHeartbeatRequest request = new TezHeartbeatRequest(cData.numUpdates, events,
-                  cData.cIdStr, cData.taId, cData.nextFromEventId, 50000);
+//              TezHeartbeatRequest request = new TezHeartbeatRequest(cData.numUpdates, events,
+//                  cData.cIdStr, cData.taId, cData.nextFromEventId, 50000);
+              TaskHeartbeatRequest request =
+                  new TaskHeartbeatRequest(cData.cIdStr, cData.taId, events, cData.nextFromEventId,
+                      50000);
               doHeartbeat(request, cData);
             } else if (version != null && cData.taId.getId() <= version.intValue()) {
               preemptContainer(cData);
@@ -431,8 +439,9 @@ public class MockDAGAppMaster extends DAGAppMaster {
               List<TezEvent> events = Collections.singletonList(new TezEvent(
                   new TaskAttemptCompletedEvent(), new EventMetaData(
                       EventProducerConsumerType.SYSTEM, cData.vName, "", cData.taId)));
-              TezHeartbeatRequest request = new TezHeartbeatRequest(++cData.numUpdates, events,
-                  cData.cIdStr, cData.taId, cData.nextFromEventId, 10000);
+              TaskHeartbeatRequest request =
+                  new TaskHeartbeatRequest(cData.cIdStr, cData.taId, events, cData.nextFromEventId,
+                      10000);
               doHeartbeat(request, cData);
               cData.clear();
             }

http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
index db8eff1..46c412e 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
@@ -1,16 +1,16 @@
 /*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 
 package org.apache.tez.dag.app;
 
@@ -18,6 +18,7 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
 import static org.mockito.Mockito.doReturn;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.times;
@@ -34,6 +35,7 @@ import java.util.Map;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.event.Event;
 import org.apache.hadoop.yarn.event.EventHandler;
@@ -41,6 +43,12 @@ import org.apache.tez.common.ContainerContext;
 import org.apache.tez.common.ContainerTask;
 import org.apache.tez.common.security.JobTokenSecretManager;
 import org.apache.tez.dag.api.TezException;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.tez.common.ContainerContext;
+import org.apache.tez.common.ContainerTask;
+import org.apache.tez.common.TezTaskUmbilicalProtocol;
+import org.apache.tez.dag.api.TaskCommunicatorContext;
 import org.apache.tez.dag.app.dag.DAG;
 import org.apache.tez.dag.app.dag.Vertex;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
@@ -101,9 +109,18 @@ public class TestTaskAttemptListenerImplTezDag {
     doReturn(dag).when(appContext).getCurrentDAG();
     doReturn(appAcls).when(appContext).getApplicationACLs();
     doReturn(amContainerMap).when(appContext).getAllContainers();
-
-    taskAttemptListener = new TaskAttemptListenerImplForTest(appContext,
-        mock(TaskHeartbeatHandler.class), mock(ContainerHeartbeatHandler.class), null);
+    NodeId nodeId = NodeId.newInstance("localhost", 0);
+    AMContainer amContainer = mock(AMContainer.class);
+    Container container = mock(Container.class);
+    doReturn(nodeId).when(container).getNodeId();
+    doReturn(amContainer).when(amContainerMap).get(any(ContainerId.class));
+    doReturn(container).when(amContainer).getContainer();
+
+    taskAttemptListener =
+        new TaskAttemptListenerImpTezDag(appContext, mock(TaskHeartbeatHandler.class),
+            mock(ContainerHeartbeatHandler.class), null);
+    TezTaskCommunicatorImpl taskCommunicator = (TezTaskCommunicatorImpl)taskAttemptListener.getTaskCommunicator();
+    TezTaskUmbilicalProtocol tezUmbilical = taskCommunicator.getUmbilical();
 
     taskSpec = mock(TaskSpec.class);
     doReturn(taskAttemptID).when(taskSpec).getTaskAttemptID();
@@ -115,32 +132,30 @@ public class TestTaskAttemptListenerImplTezDag {
   public void testGetTask() throws IOException {
 
     ContainerId containerId1 = createContainerId(appId, 1);
-    doReturn(mock(AMContainer.class)).when(amContainerMap).get(containerId1);
     ContainerContext containerContext1 = new ContainerContext(containerId1.toString());
-    containerTask = taskAttemptListener.getTask(containerContext1);
+    containerTask = tezUmbilical.getTask(containerContext1);
     assertTrue(containerTask.shouldDie());
 
     ContainerId containerId2 = createContainerId(appId, 2);
-    doReturn(mock(AMContainer.class)).when(amContainerMap).get(containerId2);
     ContainerContext containerContext2 = new ContainerContext(containerId2.toString());
     taskAttemptListener.registerRunningContainer(containerId2);
-    containerTask = taskAttemptListener.getTask(containerContext2);
+    containerTask = tezUmbilical.getTask(containerContext2);
     assertNull(containerTask);
 
     // Valid task registered
     taskAttemptListener.registerTaskAttempt(amContainerTask, containerId2);
-    containerTask = taskAttemptListener.getTask(containerContext2);
+    containerTask = tezUmbilical.getTask(containerContext2);
     assertFalse(containerTask.shouldDie());
     assertEquals(taskSpec, containerTask.getTaskSpec());
 
     // Task unregistered. Should respond to heartbeats
-    taskAttemptListener.unregisterTaskAttempt(taskAttemptID);
-    containerTask = taskAttemptListener.getTask(containerContext2);
+    taskAttemptListener.unregisterTaskAttempt(taskAttemptId);
+    containerTask = tezUmbilical.getTask(containerContext2);
     assertNull(containerTask);
 
     // Container unregistered. Should send a shouldDie = true
     taskAttemptListener.unregisterRunningContainer(containerId2);
-    containerTask = taskAttemptListener.getTask(containerContext2);
+    containerTask = tezUmbilical.getTask(containerContext2);
     assertTrue(containerTask.shouldDie());
 
     ContainerId containerId3 = createContainerId(appId, 3);
@@ -154,27 +169,30 @@ public class TestTaskAttemptListenerImplTezDag {
     AMContainerTask amContainerTask2 = new AMContainerTask(taskSpec, null, null, false, 0);
     taskAttemptListener.registerTaskAttempt(amContainerTask2, containerId3);
     taskAttemptListener.unregisterRunningContainer(containerId3);
-    containerTask = taskAttemptListener.getTask(containerContext3);
+    containerTask = tezUmbilical.getTask(containerContext3);
     assertTrue(containerTask.shouldDie());
   }
 
   @Test(timeout = 5000)
   public void testGetTaskMultiplePulls() throws IOException {
+    TezTaskCommunicatorImpl taskCommunicator = (TezTaskCommunicatorImpl)taskAttemptListener.getTaskCommunicator();
+    TezTaskUmbilicalProtocol tezUmbilical = taskCommunicator.getUmbilical();
+
     ContainerId containerId1 = createContainerId(appId, 1);
     doReturn(mock(AMContainer.class)).when(amContainerMap).get(containerId1);
     ContainerContext containerContext1 = new ContainerContext(containerId1.toString());
     taskAttemptListener.registerRunningContainer(containerId1);
-    containerTask = taskAttemptListener.getTask(containerContext1);
+    containerTask = tezUmbilical.getTask(containerContext1);
     assertNull(containerTask);
 
     // Register task
     taskAttemptListener.registerTaskAttempt(amContainerTask, containerId1);
-    containerTask = taskAttemptListener.getTask(containerContext1);
+    containerTask = tezUmbilical.getTask(containerContext1);
     assertFalse(containerTask.shouldDie());
     assertEquals(taskSpec, containerTask.getTaskSpec());
 
     // Try pulling again - simulates re-use pull
-    containerTask = taskAttemptListener.getTask(containerContext1);
+    containerTask = tezUmbilical.getTask(containerContext1);
     assertNull(containerTask);
   }
 
@@ -266,13 +284,11 @@ public class TestTaskAttemptListenerImplTezDag {
     return ContainerId.newInstance(appAttemptId, containerIdx);
   }
 
-  private static class TaskAttemptListenerImplForTest extends TaskAttemptListenerImpTezDag {
+  private static class TezTaskCommunicatorImplForTest extends TezTaskCommunicatorImpl {
 
-    public TaskAttemptListenerImplForTest(AppContext context,
-                                          TaskHeartbeatHandler thh,
-                                          ContainerHeartbeatHandler chh,
-                                          JobTokenSecretManager jobTokenSecretManager) {
-      super(context, thh, chh, jobTokenSecretManager);
+    public TezTaskCommunicatorImplForTest(
+        TaskCommunicatorContext taskCommunicatorContext) {
+      super(taskCommunicatorContext);
     }
 
     @Override


[28/43] tez git commit: TEZ-2139. Update version to 0.7.0-TEZ-2003-SNAPSHOT. (sseth)

Posted by ss...@apache.org.
TEZ-2139. Update version to 0.7.0-TEZ-2003-SNAPSHOT. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/6014c965
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/6014c965
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/6014c965

Branch: refs/heads/TEZ-2003
Commit: 6014c965aa25255d25a8a5538888a5ff9d01fbf5
Parents: 39295ca
Author: Siddharth Seth <ss...@apache.org>
Authored: Mon Feb 23 21:59:39 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:30 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                                    | 1 +
 docs/pom.xml                                            | 2 +-
 pom.xml                                                 | 2 +-
 tez-api/pom.xml                                         | 2 +-
 tez-common/pom.xml                                      | 2 +-
 tez-dag/pom.xml                                         | 2 +-
 tez-dist/pom.xml                                        | 2 +-
 tez-examples/pom.xml                                    | 2 +-
 tez-ext-service-tests/pom.xml                           | 2 +-
 tez-mapreduce/pom.xml                                   | 2 +-
 tez-plugins/pom.xml                                     | 2 +-
 tez-plugins/tez-mbeans-resource-calculator/pom.xml      | 2 +-
 tez-plugins/tez-yarn-timeline-history-with-acls/pom.xml | 2 +-
 tez-plugins/tez-yarn-timeline-history/pom.xml           | 2 +-
 tez-runtime-internals/pom.xml                           | 2 +-
 tez-runtime-library/pom.xml                             | 2 +-
 tez-tests/pom.xml                                       | 2 +-
 tez-ui/pom.xml                                          | 2 +-
 18 files changed, 18 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 4377f57..5c5fd8e 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -8,5 +8,6 @@ ALL CHANGES:
   TEZ-2125. Create a task communicator for local mode. Allow tasks to run in the AM.
   TEZ-2131. Add additional tests for tasks running in the AM.
   TEZ-2138. Fix minor bugs in adding default scheduler, getting launchers.
+  TEZ-2139. Update tez version to 0.7.0-TEZ-2003-SNAPSHOT.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/docs/pom.xml
----------------------------------------------------------------------
diff --git a/docs/pom.xml b/docs/pom.xml
index a475c58..ded04a8 100644
--- a/docs/pom.xml
+++ b/docs/pom.xml
@@ -27,7 +27,7 @@
     <parent>
       <groupId>org.apache.tez</groupId>
       <artifactId>tez</artifactId>
-      <version>0.8.0-SNAPSHOT</version>
+      <version>0.8.0-TEZ-2003-SNAPSHOT</version>
     </parent>
     <artifactId>tez-docs</artifactId>
     <packaging>pom</packaging>

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index ca9db11..8799f0a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -19,7 +19,7 @@
   <groupId>org.apache.tez</groupId>
   <artifactId>tez</artifactId>
   <packaging>pom</packaging>
-  <version>0.8.0-SNAPSHOT</version>
+  <version>0.8.0-TEZ-2003-SNAPSHOT</version>
   <name>tez</name>
 
   <licenses>

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-api/pom.xml
----------------------------------------------------------------------
diff --git a/tez-api/pom.xml b/tez-api/pom.xml
index 0a5d570..46e8dd3 100644
--- a/tez-api/pom.xml
+++ b/tez-api/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.tez</groupId>
     <artifactId>tez</artifactId>
-    <version>0.8.0-SNAPSHOT</version>
+    <version>0.8.0-TEZ-2003-SNAPSHOT</version>
   </parent>
   <artifactId>tez-api</artifactId>
 

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-common/pom.xml
----------------------------------------------------------------------
diff --git a/tez-common/pom.xml b/tez-common/pom.xml
index c6922be..5e0ceef 100644
--- a/tez-common/pom.xml
+++ b/tez-common/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.tez</groupId>
     <artifactId>tez</artifactId>
-    <version>0.8.0-SNAPSHOT</version>
+    <version>0.8.0-TEZ-2003-SNAPSHOT</version>
   </parent>
   <artifactId>tez-common</artifactId>
 

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-dag/pom.xml
----------------------------------------------------------------------
diff --git a/tez-dag/pom.xml b/tez-dag/pom.xml
index cc005bb..4d2e9f5 100644
--- a/tez-dag/pom.xml
+++ b/tez-dag/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <groupId>org.apache.tez</groupId>
     <artifactId>tez</artifactId>
-    <version>0.8.0-SNAPSHOT</version>
+    <version>0.8.0-TEZ-2003-SNAPSHOT</version>
   </parent>
   <properties>
     <tez.component>tez-dag</tez.component>

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-dist/pom.xml
----------------------------------------------------------------------
diff --git a/tez-dist/pom.xml b/tez-dist/pom.xml
index e78cb8e..e188fe2 100644
--- a/tez-dist/pom.xml
+++ b/tez-dist/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.tez</groupId>
     <artifactId>tez</artifactId>
-    <version>0.8.0-SNAPSHOT</version>
+    <version>0.8.0-TEZ-2003-SNAPSHOT</version>
   </parent>
   <artifactId>tez-dist</artifactId>
 

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-examples/pom.xml
----------------------------------------------------------------------
diff --git a/tez-examples/pom.xml b/tez-examples/pom.xml
index cd2df5c..d15e966 100644
--- a/tez-examples/pom.xml
+++ b/tez-examples/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.tez</groupId>
     <artifactId>tez</artifactId>
-    <version>0.8.0-SNAPSHOT</version>
+    <version>0.8.0-TEZ-2003-SNAPSHOT</version>
   </parent>
 
   <artifactId>tez-examples</artifactId>

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-ext-service-tests/pom.xml
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/pom.xml b/tez-ext-service-tests/pom.xml
index 37f68b1..1113341 100644
--- a/tez-ext-service-tests/pom.xml
+++ b/tez-ext-service-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <artifactId>tez</artifactId>
     <groupId>org.apache.tez</groupId>
-    <version>0.7.0-SNAPSHOT</version>
+    <version>0.8.0-TEZ-2003-SNAPSHOT</version>
   </parent>
 
   <!-- TODO TEZ-2003 Merge this into the tez-tests module -->

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-mapreduce/pom.xml
----------------------------------------------------------------------
diff --git a/tez-mapreduce/pom.xml b/tez-mapreduce/pom.xml
index 300f781..af8bc8b 100644
--- a/tez-mapreduce/pom.xml
+++ b/tez-mapreduce/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.tez</groupId>
     <artifactId>tez</artifactId>
-    <version>0.8.0-SNAPSHOT</version>
+    <version>0.8.0-TEZ-2003-SNAPSHOT</version>
   </parent>
   <artifactId>tez-mapreduce</artifactId>
 

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-plugins/pom.xml
----------------------------------------------------------------------
diff --git a/tez-plugins/pom.xml b/tez-plugins/pom.xml
index 9b2a4cb..0b148ec 100644
--- a/tez-plugins/pom.xml
+++ b/tez-plugins/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.tez</groupId>
     <artifactId>tez</artifactId>
-    <version>0.8.0-SNAPSHOT</version>
+    <version>0.8.0-TEZ-2003-SNAPSHOT</version>
   </parent>
   <artifactId>tez-plugins</artifactId>
   <packaging>pom</packaging>

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-plugins/tez-mbeans-resource-calculator/pom.xml
----------------------------------------------------------------------
diff --git a/tez-plugins/tez-mbeans-resource-calculator/pom.xml b/tez-plugins/tez-mbeans-resource-calculator/pom.xml
index 4be7f28..5c828a4 100644
--- a/tez-plugins/tez-mbeans-resource-calculator/pom.xml
+++ b/tez-plugins/tez-mbeans-resource-calculator/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <groupId>org.apache.tez</groupId>
     <artifactId>tez-plugins</artifactId>
-    <version>0.8.0-SNAPSHOT</version>
+    <version>0.8.0-TEZ-2003-SNAPSHOT</version>
   </parent>
   <artifactId>tez-mbeans-resource-calculator</artifactId>
 

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-plugins/tez-yarn-timeline-history-with-acls/pom.xml
----------------------------------------------------------------------
diff --git a/tez-plugins/tez-yarn-timeline-history-with-acls/pom.xml b/tez-plugins/tez-yarn-timeline-history-with-acls/pom.xml
index aeae5cf..761bc10 100644
--- a/tez-plugins/tez-yarn-timeline-history-with-acls/pom.xml
+++ b/tez-plugins/tez-yarn-timeline-history-with-acls/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <groupId>org.apache.tez</groupId>
     <artifactId>tez-plugins</artifactId>
-    <version>0.8.0-SNAPSHOT</version>
+    <version>0.8.0-TEZ-2003-SNAPSHOT</version>
   </parent>
   <artifactId>tez-yarn-timeline-history-with-acls</artifactId>
 

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-plugins/tez-yarn-timeline-history/pom.xml
----------------------------------------------------------------------
diff --git a/tez-plugins/tez-yarn-timeline-history/pom.xml b/tez-plugins/tez-yarn-timeline-history/pom.xml
index ca76988..1e40329 100644
--- a/tez-plugins/tez-yarn-timeline-history/pom.xml
+++ b/tez-plugins/tez-yarn-timeline-history/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <groupId>org.apache.tez</groupId>
     <artifactId>tez-plugins</artifactId>
-    <version>0.8.0-SNAPSHOT</version>
+    <version>0.8.0-TEZ-2003-SNAPSHOT</version>
   </parent>
   <artifactId>tez-yarn-timeline-history</artifactId>
 

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-runtime-internals/pom.xml
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/pom.xml b/tez-runtime-internals/pom.xml
index 3756fa9..b6dee8c 100644
--- a/tez-runtime-internals/pom.xml
+++ b/tez-runtime-internals/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.tez</groupId>
     <artifactId>tez</artifactId>
-    <version>0.8.0-SNAPSHOT</version>
+    <version>0.8.0-TEZ-2003-SNAPSHOT</version>
   </parent>
   <artifactId>tez-runtime-internals</artifactId>
 

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-runtime-library/pom.xml
----------------------------------------------------------------------
diff --git a/tez-runtime-library/pom.xml b/tez-runtime-library/pom.xml
index 03e0ec3..8b566df 100644
--- a/tez-runtime-library/pom.xml
+++ b/tez-runtime-library/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.tez</groupId>
     <artifactId>tez</artifactId>
-    <version>0.8.0-SNAPSHOT</version>
+    <version>0.8.0-TEZ-2003-SNAPSHOT</version>
   </parent>
   <artifactId>tez-runtime-library</artifactId>
 

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-tests/pom.xml
----------------------------------------------------------------------
diff --git a/tez-tests/pom.xml b/tez-tests/pom.xml
index 91bc753..530ab77 100644
--- a/tez-tests/pom.xml
+++ b/tez-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.tez</groupId>
     <artifactId>tez</artifactId>
-    <version>0.8.0-SNAPSHOT</version>
+    <version>0.8.0-TEZ-2003-SNAPSHOT</version>
   </parent>
   <artifactId>tez-tests</artifactId>
 

http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-ui/pom.xml
----------------------------------------------------------------------
diff --git a/tez-ui/pom.xml b/tez-ui/pom.xml
index bfc6f01..8ecf7d0 100644
--- a/tez-ui/pom.xml
+++ b/tez-ui/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.tez</groupId>
     <artifactId>tez</artifactId>
-    <version>0.8.0-SNAPSHOT</version>
+    <version>0.8.0-TEZ-2003-SNAPSHOT</version>
   </parent>
   <artifactId>tez-ui</artifactId>
   <packaging>war</packaging>


[32/43] tez git commit: TEZ-2131. Add additional tests for tasks running in the AM. (sseth)

Posted by ss...@apache.org.
TEZ-2131. Add additional tests for tasks running in the AM. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/67e1643c
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/67e1643c
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/67e1643c

Branch: refs/heads/TEZ-2003
Commit: 67e1643c727103bb9fc04d94909a0dfea980d22f
Parents: a45ef85
Author: Siddharth Seth <ss...@apache.org>
Authored: Fri Feb 20 17:23:18 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:30 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                                      |  1 +
 .../org/apache/tez/tests/TestExternalTezServices.java     | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/67e1643c/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 76496c9..4b0a12b 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -6,5 +6,6 @@ ALL CHANGES:
   TEZ-2122. Setup pluggable components at AM/Vertex level.
   TEZ-2123. Fix component managers to use pluggable components. (Enable hybrid mode)
   TEZ-2125. Create a task communicator for local mode. Allow tasks to run in the AM.
+  TEZ-2131. Add additional tests for tasks running in the AM.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/67e1643c/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
index 01c2080..0ec972b 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
@@ -40,7 +40,6 @@ import org.apache.tez.service.MiniTezTestServiceCluster;
 import org.apache.tez.test.MiniTezCluster;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
-import org.junit.Ignore;
 import org.junit.Test;
 
 public class TestExternalTezServices {
@@ -229,6 +228,13 @@ public class TestExternalTezServices {
   }
 
   @Test(timeout = 60000)
+  public void testAllInAM() throws Exception {
+    int expectedExternalSubmissions = 0; // All in AM
+    runJoinValidate("AllInAM", expectedExternalSubmissions, PROPS_IN_AM,
+        PROPS_IN_AM, PROPS_IN_AM);
+  }
+
+  @Test(timeout = 60000)
   public void testMixed1() throws Exception { // M-ExtService, R-containers
     int expectedExternalSubmissions = 4 + 0; //4 for 4 src files, 0 for num reducers.
     runJoinValidate("Mixed1", expectedExternalSubmissions, PROPS_EXT_SERVICE_PUSH,
@@ -263,8 +269,6 @@ public class TestExternalTezServices {
         PROPS_EXT_SERVICE_PUSH, PROPS_IN_AM);
   }
 
-
-  @Ignore // Re-activate this after the AM registers the shuffle token with the launcher.
   @Test(timeout = 60000)
   public void testMixed6() throws Exception { // M - AM, R - Service
     int expectedExternalSubmissions = 0 + 3; // 3 for R in service


[08/43] tez git commit: TEZ-1970. Fix javadoc warnings in SortMergeJoinExample. (Mit Desai via hitesh)

Posted by ss...@apache.org.
TEZ-1970. Fix javadoc warnings in SortMergeJoinExample. (Mit Desai via hitesh)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/d5a0f39d
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/d5a0f39d
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/d5a0f39d

Branch: refs/heads/TEZ-2003
Commit: d5a0f39d98be9ae06be9008c1deee36827c204e6
Parents: 78ca7af
Author: Hitesh Shah <hi...@apache.org>
Authored: Wed May 6 16:16:30 2015 -0700
Committer: Hitesh Shah <hi...@apache.org>
Committed: Wed May 6 16:16:30 2015 -0700

----------------------------------------------------------------------
 CHANGES.txt                                                        | 1 +
 .../main/java/org/apache/tez/examples/SortMergeJoinExample.java    | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/d5a0f39d/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 73f8fda..58648e4 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -9,6 +9,7 @@ INCOMPATIBLE CHANGES
 ALL CHANGES:
   TEZ-2419. Inputs/Outputs should inform the Processor about Interrupts when interrupted during a blocking Op.
   TEZ-1752. Inputs / Outputs in the Runtime library should be interruptable.
+  TEZ-1970. Fix javadoc warnings in SortMergeJoinExample.
 
 Release 0.7.0: Unreleased
 

http://git-wip-us.apache.org/repos/asf/tez/blob/d5a0f39d/tez-examples/src/main/java/org/apache/tez/examples/SortMergeJoinExample.java
----------------------------------------------------------------------
diff --git a/tez-examples/src/main/java/org/apache/tez/examples/SortMergeJoinExample.java b/tez-examples/src/main/java/org/apache/tez/examples/SortMergeJoinExample.java
index 1d59a29..5f4de18 100644
--- a/tez-examples/src/main/java/org/apache/tez/examples/SortMergeJoinExample.java
+++ b/tez-examples/src/main/java/org/apache/tez/examples/SortMergeJoinExample.java
@@ -138,7 +138,7 @@ public class SortMergeJoinExample extends TezExampleBase {
    * @param inputPath2
    * @param outPath
    * @param numPartitions
-   * @return
+   * @return dag
    * @throws IOException
    */
   private DAG createDag(TezConfiguration tezConf, Path inputPath1,


[25/43] tez git commit: TEZ-2123. Fix component managers to use pluggable components. Enable hybrid mode. (sseth)

Posted by ss...@apache.org.
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
index 54b9adb..c1169ef 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
@@ -223,7 +223,7 @@ public class TestContainerReuse {
 
     taskSchedulerEventHandler.handleEvent(
       new AMSchedulerEventTAEnded(
-        ta11, containerHost1.getId(), TaskAttemptState.SUCCEEDED));
+        ta11, containerHost1.getId(), TaskAttemptState.SUCCEEDED, 0));
     drainableAppCallback.drain();
     verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
     verify(taskSchedulerEventHandler, times(1)).taskAllocated(
@@ -235,7 +235,7 @@ public class TestContainerReuse {
 
     taskSchedulerEventHandler.handleEvent(
       new AMSchedulerEventTAEnded(ta21, containerHost2.getId(),
-        TaskAttemptState.SUCCEEDED));
+        TaskAttemptState.SUCCEEDED, 0));
 
     long currentTs = System.currentTimeMillis();
     Throwable exception = null;
@@ -356,7 +356,7 @@ public class TestContainerReuse {
 
     taskSchedulerEventHandler.handleEvent(
       new AMSchedulerEventTAEnded(ta21, containerHost2.getId(),
-        TaskAttemptState.SUCCEEDED));
+        TaskAttemptState.SUCCEEDED, 0));
     drainableAppCallback.drain();
     verify(taskScheduler).deallocateTask(eq(ta21), eq(true));
     verify(taskSchedulerEventHandler, times(0)).taskAllocated(
@@ -459,7 +459,7 @@ public class TestContainerReuse {
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta11), any(Object.class), eq(container1));
 
     // Task assigned to container completed successfully. Container should be re-used.
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
     drainableAppCallback.drain();
     verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta12), any(Object.class), eq(container1));
@@ -469,7 +469,7 @@ public class TestContainerReuse {
 
     // Task assigned to container completed successfully.
     // Verify reuse across hosts.
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta12, container1.getId(), TaskAttemptState.SUCCEEDED));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta12, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
     drainableAppCallback.drain();
     verify(taskScheduler).deallocateTask(eq(ta12), eq(true));
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta13), any(Object.class), eq(container1));
@@ -478,7 +478,7 @@ public class TestContainerReuse {
     eventHandler.reset();
 
     // Verify no re-use if a previous task fails.
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta13, container1.getId(), TaskAttemptState.FAILED));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta13, container1.getId(), TaskAttemptState.FAILED, 0));
     drainableAppCallback.drain();
     verify(taskSchedulerEventHandler, times(0)).taskAllocated(eq(ta14), any(Object.class), eq(container1));
     verify(taskScheduler).deallocateTask(eq(ta13), eq(false));
@@ -496,7 +496,7 @@ public class TestContainerReuse {
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta14), any(Object.class), eq(container2));
 
     // Task assigned to container completed successfully. No pending requests. Container should be released.
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta14, container2.getId(), TaskAttemptState.SUCCEEDED));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta14, container2.getId(), TaskAttemptState.SUCCEEDED, 0));
     drainableAppCallback.drain();
     verify(taskScheduler).deallocateTask(eq(ta14), eq(true));
     verify(rmClient).releaseAssignedContainer(eq(container2.getId()));
@@ -607,7 +607,7 @@ public class TestContainerReuse {
 
     // First task had profiling on. This container can not be reused further.
     taskSchedulerEventHandler.handleEvent(
-        new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED));
+        new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
     drainableAppCallback.drain();
     verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
     verify(taskSchedulerEventHandler, times(0)).taskAllocated(eq(ta12), any(Object.class),
@@ -653,7 +653,7 @@ public class TestContainerReuse {
 
     // Verify that the container can not be reused when profiling option is turned on
     // Even for 2 tasks having same profiling option can have container reusability.
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta13, container2.getId(), TaskAttemptState.SUCCEEDED));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta13, container2.getId(), TaskAttemptState.SUCCEEDED, 0));
     drainableAppCallback.drain();
     verify(taskScheduler).deallocateTask(eq(ta13), eq(true));
     verify(taskSchedulerEventHandler, times(0)).taskAllocated(eq(ta14), any(Object.class),
@@ -698,7 +698,7 @@ public class TestContainerReuse {
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta15), any(Object.class), eq(container3));
 
     //Ensure task 6 (of vertex 1) is allocated to same container
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta15, container3.getId(), TaskAttemptState.SUCCEEDED));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta15, container3.getId(), TaskAttemptState.SUCCEEDED, 0));
     drainableAppCallback.drain();
     verify(taskScheduler).deallocateTask(eq(ta15), eq(true));
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta16), any(Object.class), eq(container3));
@@ -811,7 +811,7 @@ public class TestContainerReuse {
     // until delay expires.
     taskSchedulerEventHandler.handleEvent(
       new AMSchedulerEventTAEnded(ta11, container1.getId(),
-        TaskAttemptState.SUCCEEDED));
+        TaskAttemptState.SUCCEEDED, 0));
     drainableAppCallback.drain();
     verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
     verify(taskSchedulerEventHandler, times(0)).taskAllocated(
@@ -828,7 +828,7 @@ public class TestContainerReuse {
     // TA12 completed.
     taskSchedulerEventHandler.handleEvent(
       new AMSchedulerEventTAEnded(ta12, container1.getId(),
-        TaskAttemptState.SUCCEEDED));
+        TaskAttemptState.SUCCEEDED, 0));
     drainableAppCallback.drain();
     LOG.info("Sleeping to ensure that the scheduling loop runs");
     Thread.sleep(3000l);
@@ -946,7 +946,7 @@ public class TestContainerReuse {
     // Container should  be assigned to task21.
     taskSchedulerEventHandler.handleEvent(
       new AMSchedulerEventTAEnded(ta11, container1.getId(),
-        TaskAttemptState.SUCCEEDED));
+        TaskAttemptState.SUCCEEDED, 0));
     drainableAppCallback.drain();
     verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
     verify(taskSchedulerEventHandler).taskAllocated(
@@ -956,7 +956,7 @@ public class TestContainerReuse {
     // Task 2 completes.
     taskSchedulerEventHandler.handleEvent(
       new AMSchedulerEventTAEnded(ta21, container1.getId(),
-        TaskAttemptState.SUCCEEDED));
+        TaskAttemptState.SUCCEEDED, 0));
     verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
 
     LOG.info("Sleeping to ensure that the scheduling loop runs");
@@ -1065,7 +1065,7 @@ public class TestContainerReuse {
     assertEquals(1, assignEvent.getRemoteTaskLocalResources().size());
     
     // Task assigned to container completed successfully. Container should be re-used.
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta111, container1.getId(), TaskAttemptState.SUCCEEDED));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta111, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
     drainableAppCallback.drain();
     verify(taskScheduler).deallocateTask(eq(ta111), eq(true));
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta112), any(Object.class), eq(container1));
@@ -1077,7 +1077,7 @@ public class TestContainerReuse {
 
     // Task assigned to container completed successfully.
     // Verify reuse across hosts.
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta112, container1.getId(), TaskAttemptState.SUCCEEDED));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta112, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
     drainableAppCallback.drain();
     verify(taskScheduler).deallocateTask(eq(ta112), eq(true));
     verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
@@ -1118,7 +1118,7 @@ public class TestContainerReuse {
     assertEquals(2, assignEvent.getRemoteTaskLocalResources().size());
     eventHandler.reset();
 
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta211, container1.getId(), TaskAttemptState.SUCCEEDED));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta211, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
     drainableAppCallback.drain();
     verify(taskScheduler).deallocateTask(eq(ta211), eq(true));
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta212), any(Object.class), eq(container1));

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskScheduler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskScheduler.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskScheduler.java
index 60782e6..12390b2 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskScheduler.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskScheduler.java
@@ -59,7 +59,7 @@ public class TestLocalTaskScheduler {
     TezConfiguration tezConf = new TezConfiguration();
     tezConf.setInt(TezConfiguration.TEZ_AM_INLINE_TASK_EXECUTION_MAX_TASKS, MAX_TASKS);
 
-    LocalContainerFactory containerFactory = new LocalContainerFactory(createMockAppContext());
+    LocalContainerFactory containerFactory = new LocalContainerFactory(createMockAppContext(), 1000);
     HashMap<Object, Container> taskAllocations = new LinkedHashMap<Object, Container>();
     PriorityBlockingQueue<TaskRequest> taskRequestQueue = new PriorityBlockingQueue<TaskRequest>();
     TaskSchedulerAppCallback appClientDelegate = mock(TaskSchedulerAppCallback.class);

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
index 3cf4f6c..25cf4b5 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
@@ -22,6 +22,8 @@ import java.util.HashMap;
 import java.util.concurrent.BlockingQueue;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
@@ -81,8 +83,12 @@ public class TestLocalTaskSchedulerService {
    */
   @Test(timeout = 5000)
   public void testDeallocationBeforeAllocation() {
+    AppContext appContext = mock(AppContext.class);
+    ApplicationAttemptId appAttemptId =
+        ApplicationAttemptId.newInstance(ApplicationId.newInstance(10000l, 1), 1);
+    doReturn(appAttemptId).when(appContext).getApplicationAttemptId();
     MockLocalTaskSchedulerSerivce taskSchedulerService = new MockLocalTaskSchedulerSerivce
-        (mock(TaskSchedulerAppCallback.class), mock(ContainerSignatureMatcher.class), "", 0, "", mock(AppContext.class));
+        (mock(TaskSchedulerAppCallback.class), mock(ContainerSignatureMatcher.class), "", 0, "", appContext);
     taskSchedulerService.init(new Configuration());
     taskSchedulerService.start();
 
@@ -105,8 +111,12 @@ public class TestLocalTaskSchedulerService {
    */
   @Test(timeout = 5000)
   public void testDeallocationAfterAllocation() {
+    AppContext appContext = mock(AppContext.class);
+    ApplicationAttemptId appAttemptId =
+        ApplicationAttemptId.newInstance(ApplicationId.newInstance(10000l, 1), 1);
+    doReturn(appAttemptId).when(appContext).getApplicationAttemptId();
     MockLocalTaskSchedulerSerivce taskSchedulerService = new MockLocalTaskSchedulerSerivce
-        (mock(TaskSchedulerAppCallback.class), mock(ContainerSignatureMatcher.class), "", 0, "", mock(AppContext.class));
+        (mock(TaskSchedulerAppCallback.class), mock(ContainerSignatureMatcher.class), "", 0, "", appContext);
     taskSchedulerService.init(new Configuration());
     taskSchedulerService.start();
 
@@ -132,13 +142,13 @@ public class TestLocalTaskSchedulerService {
         String appHostName, int appHostPort, String appTrackingUrl,
         AppContext appContext) {
       super(appClient, containerSignatureMatcher, appHostName, appHostPort,
-          appTrackingUrl, appContext);
+          appTrackingUrl, 10000l, appContext);
     }
 
     @Override
     public AsyncDelegateRequestHandler createRequestHandler(Configuration conf) {
       requestHandler = new MockAsyncDelegateRequestHandler(taskRequestQueue,
-          new LocalContainerFactory(appContext),
+          new LocalContainerFactory(appContext, customContainerAppId),
           taskAllocations,
           appClientDelegate,
           conf);

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
index 291e786..4ee05cc 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
@@ -89,7 +89,7 @@ public class TestTaskSchedulerEventHandler {
     public MockTaskSchedulerEventHandler(AppContext appContext,
         DAGClientServer clientService, EventHandler eventHandler,
         ContainerSignatureMatcher containerSignatureMatcher, WebUIService webUI) {
-      super(appContext, clientService, eventHandler, containerSignatureMatcher, webUI, new String[] {});
+      super(appContext, clientService, eventHandler, containerSignatureMatcher, webUI, new String[] {}, false);
     }
 
     @Override
@@ -162,7 +162,7 @@ public class TestTaskSchedulerEventHandler {
 
     AMSchedulerEventTALaunchRequest lr =
         new AMSchedulerEventTALaunchRequest(mockAttemptId, resource, null, mockTaskAttempt, locHint,
-            priority, containerContext);
+            priority, containerContext, 0, 0, 0);
     schedulerHandler.taskAllocated(mockTaskAttempt, lr, container);
     assertEquals(2, mockEventHandler.events.size());
     assertTrue(mockEventHandler.events.get(1) instanceof AMContainerEventAssignTA);
@@ -249,9 +249,14 @@ public class TestTaskSchedulerEventHandler {
     Configuration conf = new Configuration(false);
     schedulerHandler.init(conf);
     schedulerHandler.start();
-    
+
+    AMContainer mockAmContainer = mock(AMContainer.class);
+    when(mockAmContainer.getTaskSchedulerIdentifier()).thenReturn(0);
+    when(mockAmContainer.getContainerLauncherIdentifier()).thenReturn(0);
+    when(mockAmContainer.getTaskCommunicatorIdentifier()).thenReturn(0);
     ContainerId mockCId = mock(ContainerId.class);
     verify(mockTaskScheduler, times(0)).deallocateContainer((ContainerId)any());
+    when(mockAMContainerMap.get(mockCId)).thenReturn(mockAmContainer);
     schedulerHandler.preemptContainer(mockCId);
     verify(mockTaskScheduler, times(1)).deallocateContainer(mockCId);
     assertEquals(1, mockEventHandler.events.size());

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
index d775300..ffab769 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
@@ -127,7 +127,7 @@ class TestTaskSchedulerHelpers {
         EventHandler eventHandler,
         TezAMRMClientAsync<CookieContainerRequest> amrmClientAsync,
         ContainerSignatureMatcher containerSignatureMatcher) {
-      super(appContext, null, eventHandler, containerSignatureMatcher, null, new String[]{});
+      super(appContext, null, eventHandler, containerSignatureMatcher, null, new String[]{}, false);
       this.amrmClientAsync = amrmClientAsync;
       this.containerSignatureMatcher = containerSignatureMatcher;
     }

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java
index fafbba6..bdd0f61 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java
@@ -63,6 +63,7 @@ import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.util.SystemClock;
 import org.apache.tez.common.security.JobTokenIdentifier;
 import org.apache.tez.common.security.TokenCache;
+import org.apache.tez.dag.api.TaskCommunicator;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.dag.app.ContainerHeartbeatHandler;
 import org.apache.tez.dag.app.ContainerContext;
@@ -104,7 +105,7 @@ public class TestAMContainer {
     wc.verifyState(AMContainerState.LAUNCHING);
     // 1 Launch request.
     wc.verifyCountAndGetOutgoingEvents(1);
-    verify(wc.tal).registerRunningContainer(wc.containerID);
+    verify(wc.tal).registerRunningContainer(wc.containerID, 0);
     assertNull(wc.amContainer.getCurrentTaskAttempt());
 
     // Assign task.
@@ -121,7 +122,7 @@ public class TestAMContainer {
     // Once for the previous NO_TASKS, one for the actual task.
     verify(wc.chh).register(wc.containerID);
     ArgumentCaptor<AMContainerTask> argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
-    verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+    verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
     assertEquals(1, argumentCaptor.getAllValues().size());
     assertEquals(wc.taskAttemptID, argumentCaptor.getAllValues().get(0).getTask().getTaskAttemptID());
     assertEquals(WrappedContainer.taskPriority, argumentCaptor.getAllValues().get(0).getPriority());
@@ -131,14 +132,14 @@ public class TestAMContainer {
     wc.verifyState(AMContainerState.IDLE);
     wc.verifyNoOutgoingEvents();
     assertNull(wc.amContainer.getCurrentTaskAttempt());
-    verify(wc.tal).unregisterTaskAttempt(wc.taskAttemptID);
+    verify(wc.tal).unregisterTaskAttempt(wc.taskAttemptID, 0);
 
     // Container completed
     wc.containerCompleted();
     wc.verifyHistoryStopEvent();
     wc.verifyState(AMContainerState.COMPLETED);
     wc.verifyNoOutgoingEvents();
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
     verify(wc.chh).unregister(wc.containerID);
 
     assertEquals(1, wc.amContainer.getAllTaskAttempts().size());
@@ -157,7 +158,7 @@ public class TestAMContainer {
     wc.verifyState(AMContainerState.LAUNCHING);
     // 1 Launch request.
     wc.verifyCountAndGetOutgoingEvents(1);
-    verify(wc.tal).registerRunningContainer(wc.containerID);
+    verify(wc.tal).registerRunningContainer(wc.containerID, 0);
 
     // Container Launched
     wc.containerLaunched();
@@ -172,7 +173,7 @@ public class TestAMContainer {
     wc.verifyNoOutgoingEvents();
     assertEquals(wc.taskAttemptID, wc.amContainer.getCurrentTaskAttempt());
     ArgumentCaptor<AMContainerTask> argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
-    verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+    verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
     assertEquals(1, argumentCaptor.getAllValues().size());
     assertEquals(wc.taskAttemptID, argumentCaptor.getAllValues().get(0).getTask().getTaskAttemptID());
 
@@ -180,13 +181,13 @@ public class TestAMContainer {
     wc.verifyState(AMContainerState.IDLE);
     wc.verifyNoOutgoingEvents();
     assertNull(wc.amContainer.getCurrentTaskAttempt());
-    verify(wc.tal).unregisterTaskAttempt(wc.taskAttemptID);
+    verify(wc.tal).unregisterTaskAttempt(wc.taskAttemptID, 0);
 
     wc.containerCompleted();
     wc.verifyHistoryStopEvent();
     wc.verifyState(AMContainerState.COMPLETED);
     wc.verifyNoOutgoingEvents();
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
     verify(wc.chh).unregister(wc.containerID);
 
     assertEquals(1, wc.amContainer.getAllTaskAttempts().size());
@@ -205,7 +206,7 @@ public class TestAMContainer {
     wc.verifyState(AMContainerState.LAUNCHING);
     // 1 Launch request.
     wc.verifyCountAndGetOutgoingEvents(1);
-    verify(wc.tal).registerRunningContainer(wc.containerID);
+    verify(wc.tal).registerRunningContainer(wc.containerID, 0);
     assertNull(wc.amContainer.getCurrentTaskAttempt());
 
     // Assign task.
@@ -222,7 +223,7 @@ public class TestAMContainer {
     // Once for the previous NO_TASKS, one for the actual task.
     verify(wc.chh).register(wc.containerID);
     ArgumentCaptor<AMContainerTask> argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
-    verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+    verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
     assertEquals(1, argumentCaptor.getAllValues().size());
     assertEquals(wc.taskAttemptID, argumentCaptor.getAllValues().get(0).getTask().getTaskAttemptID());
 
@@ -231,13 +232,13 @@ public class TestAMContainer {
     wc.verifyState(AMContainerState.IDLE);
     wc.verifyNoOutgoingEvents();
     assertNull(wc.amContainer.getCurrentTaskAttempt());
-    verify(wc.tal).unregisterTaskAttempt(wc.taskAttemptID);
+    verify(wc.tal).unregisterTaskAttempt(wc.taskAttemptID, 0);
 
     TezTaskAttemptID taId2 = TezTaskAttemptID.getInstance(wc.taskID, 2);
     wc.assignTaskAttempt(taId2);
     wc.verifyState(AMContainerState.RUNNING);
     argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
-    verify(wc.tal, times(2)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+    verify(wc.tal, times(2)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
     assertEquals(2, argumentCaptor.getAllValues().size());
     assertEquals(taId2, argumentCaptor.getAllValues().get(1).getTask().getTaskAttemptID());
 
@@ -246,14 +247,14 @@ public class TestAMContainer {
     wc.verifyState(AMContainerState.IDLE);
     wc.verifyNoOutgoingEvents();
     assertNull(wc.amContainer.getCurrentTaskAttempt());
-    verify(wc.tal).unregisterTaskAttempt(taId2);
+    verify(wc.tal).unregisterTaskAttempt(taId2, 0);
 
     // Container completed
     wc.containerCompleted();
     wc.verifyHistoryStopEvent();
     wc.verifyState(AMContainerState.COMPLETED);
     wc.verifyNoOutgoingEvents();
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
     verify(wc.chh).unregister(wc.containerID);
 
     assertEquals(2, wc.amContainer.getAllTaskAttempts().size());
@@ -286,7 +287,7 @@ public class TestAMContainer {
     wc.verifyHistoryStopEvent();
     wc.verifyState(AMContainerState.COMPLETED);
     wc.verifyNoOutgoingEvents();
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
     verify(wc.chh).unregister(wc.containerID);
 
     assertNull(wc.amContainer.getCurrentTaskAttempt());
@@ -323,7 +324,7 @@ public class TestAMContainer {
     wc.verifyHistoryStopEvent();
     wc.verifyState(AMContainerState.COMPLETED);
     wc.verifyNoOutgoingEvents();
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
     verify(wc.chh).unregister(wc.containerID);
 
     assertNull(wc.amContainer.getCurrentTaskAttempt());
@@ -346,7 +347,7 @@ public class TestAMContainer {
     wc.assignTaskAttempt(taID2);
 
     wc.verifyState(AMContainerState.STOP_REQUESTED);
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
     verify(wc.chh).unregister(wc.containerID);
     // 1 for NM stop request. 2 TERMINATING to TaskAttempt.
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(3);
@@ -378,13 +379,13 @@ public class TestAMContainer {
     wc.launchContainer();
     wc.assignTaskAttempt(wc.taskAttemptID);
     wc.verifyState(AMContainerState.LAUNCHING);
-    verify(wc.tal).registerRunningContainer(wc.containerID);
+    verify(wc.tal).registerRunningContainer(wc.containerID, 0);
 
     TezTaskAttemptID taID2 = TezTaskAttemptID.getInstance(wc.taskID, 2);
     wc.assignTaskAttempt(taID2);
 
     wc.verifyState(AMContainerState.STOP_REQUESTED);
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
     verify(wc.chh).unregister(wc.containerID);
     // 1 for NM stop request. 2 TERMINATING to TaskAttempt.
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(3);
@@ -420,7 +421,7 @@ public class TestAMContainer {
 
     wc.containerTimedOut();
     wc.verifyState(AMContainerState.STOP_REQUESTED);
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
     verify(wc.chh).unregister(wc.containerID);
     // 1 to TA, 1 for RM de-allocate.
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(2);
@@ -454,7 +455,7 @@ public class TestAMContainer {
 
     wc.stopRequest();
     wc.verifyState(AMContainerState.STOP_REQUESTED);
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
     verify(wc.chh).unregister(wc.containerID);
     // 1 to TA, 1 for RM de-allocate.
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(2);
@@ -484,11 +485,11 @@ public class TestAMContainer {
     wc.launchContainer();
     wc.assignTaskAttempt(wc.taskAttemptID);
     wc.verifyState(AMContainerState.LAUNCHING);
-    verify(wc.tal).registerRunningContainer(wc.containerID);
+    verify(wc.tal).registerRunningContainer(wc.containerID, 0);
     wc.launchFailed();
     wc.verifyState(AMContainerState.STOPPING);
-    verify(wc.tal).registerRunningContainer(wc.containerID);
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
 
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(2);
     verifyUnOrderedOutgoingEventTypes(outgoingEvents,
@@ -537,8 +538,8 @@ public class TestAMContainer {
 
     wc.containerCompleted();
     wc.verifyState(AMContainerState.COMPLETED);
-    verify(wc.tal).registerRunningContainer(wc.containerID);
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
 
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
     verifyUnOrderedOutgoingEventTypes(outgoingEvents,
@@ -567,8 +568,8 @@ public class TestAMContainer {
 
     wc.containerCompleted(ContainerExitStatus.DISKS_FAILED, TaskAttemptTerminationCause.NODE_DISK_ERROR);
     wc.verifyState(AMContainerState.COMPLETED);
-    verify(wc.tal).registerRunningContainer(wc.containerID);
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
 
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
     verifyUnOrderedOutgoingEventTypes(outgoingEvents,
@@ -598,8 +599,8 @@ public class TestAMContainer {
 
     wc.containerCompleted(ContainerExitStatus.ABORTED, TaskAttemptTerminationCause.NODE_FAILED);
     wc.verifyState(AMContainerState.COMPLETED);
-    verify(wc.tal).registerRunningContainer(wc.containerID);
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
 
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
     verifyUnOrderedOutgoingEventTypes(outgoingEvents,
@@ -629,8 +630,8 @@ public class TestAMContainer {
 
     wc.containerCompleted();
     wc.verifyState(AMContainerState.COMPLETED);
-    verify(wc.tal).registerRunningContainer(wc.containerID);
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
     verify(wc.chh).register(wc.containerID);
     verify(wc.chh).unregister(wc.containerID);
 
@@ -658,8 +659,8 @@ public class TestAMContainer {
 
     wc.containerCompleted();
     wc.verifyState(AMContainerState.COMPLETED);
-    verify(wc.tal).registerRunningContainer(wc.containerID);
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
     verify(wc.chh).register(wc.containerID);
     verify(wc.chh).unregister(wc.containerID);
 
@@ -693,8 +694,8 @@ public class TestAMContainer {
 
     wc.containerCompleted(ContainerExitStatus.PREEMPTED, TaskAttemptTerminationCause.EXTERNAL_PREEMPTION);
     wc.verifyState(AMContainerState.COMPLETED);
-    verify(wc.tal).registerRunningContainer(wc.containerID);
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
     verify(wc.chh).register(wc.containerID);
     verify(wc.chh).unregister(wc.containerID);
 
@@ -730,8 +731,8 @@ public class TestAMContainer {
 
     wc.containerCompleted(ContainerExitStatus.INVALID, TaskAttemptTerminationCause.INTERNAL_PREEMPTION);
     wc.verifyState(AMContainerState.COMPLETED);
-    verify(wc.tal).registerRunningContainer(wc.containerID);
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
     verify(wc.chh).register(wc.containerID);
     verify(wc.chh).unregister(wc.containerID);
 
@@ -767,8 +768,8 @@ public class TestAMContainer {
 
     wc.containerCompleted(ContainerExitStatus.DISKS_FAILED, TaskAttemptTerminationCause.NODE_DISK_ERROR);
     wc.verifyState(AMContainerState.COMPLETED);
-    verify(wc.tal).registerRunningContainer(wc.containerID);
-    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
     verify(wc.chh).register(wc.containerID);
     verify(wc.chh).unregister(wc.containerID);
 
@@ -1011,7 +1012,7 @@ public class TestAMContainer {
     wc.containerLaunched();
     wc.assignTaskAttempt(wc.taskAttemptID);
     ArgumentCaptor<AMContainerTask> argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
-    verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+    verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
     AMContainerTask task1 = argumentCaptor.getAllValues().get(0);
     assertEquals(0, task1.getAdditionalResources().size());
     wc.taskAttemptSucceeded(wc.taskAttemptID);
@@ -1024,7 +1025,7 @@ public class TestAMContainer {
     TezTaskAttemptID taID2 = TezTaskAttemptID.getInstance(wc.taskID, 2);
     wc.assignTaskAttempt(taID2, additionalResources, new Credentials());
     argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
-    verify(wc.tal, times(2)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+    verify(wc.tal, times(2)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
     AMContainerTask task2 = argumentCaptor.getAllValues().get(1);
     Map<String, LocalResource> pullTaskAdditionalResources = task2.getAdditionalResources();
     assertEquals(2, pullTaskAdditionalResources.size());
@@ -1047,7 +1048,7 @@ public class TestAMContainer {
     TezTaskAttemptID taID3 = TezTaskAttemptID.getInstance(wc.taskID, 3);
     wc.assignTaskAttempt(taID3, new HashMap<String, LocalResource>(), new Credentials());
     argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
-    verify(wc.tal, times(3)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+    verify(wc.tal, times(3)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
     AMContainerTask task3 = argumentCaptor.getAllValues().get(2);
     assertEquals(0, task3.getAdditionalResources().size());
     wc.taskAttemptSucceeded(taID3);
@@ -1100,7 +1101,7 @@ public class TestAMContainer {
     wc.containerLaunched();
     wc.assignTaskAttempt(attempt11, LRs, dag1Credentials);
     argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
-    verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+    verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
     fetchedTask = argumentCaptor.getAllValues().get(0);
     assertTrue(fetchedTask.haveCredentialsChanged());
     assertNotNull(fetchedTask.getCredentials());
@@ -1109,7 +1110,7 @@ public class TestAMContainer {
 
     wc.assignTaskAttempt(attempt12, LRs, dag1Credentials);
     argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
-    verify(wc.tal, times(2)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+    verify(wc.tal, times(2)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
     fetchedTask = argumentCaptor.getAllValues().get(1);
     assertFalse(fetchedTask.haveCredentialsChanged());
     assertNull(fetchedTask.getCredentials());
@@ -1119,7 +1120,7 @@ public class TestAMContainer {
     wc.setNewDAGID(dagID2);
     wc.assignTaskAttempt(attempt21, LRs, null);
     argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
-    verify(wc.tal, times(3)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+    verify(wc.tal, times(3)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
     fetchedTask = argumentCaptor.getAllValues().get(2);
     assertTrue(fetchedTask.haveCredentialsChanged());
     assertNull(fetchedTask.getCredentials());
@@ -1127,7 +1128,7 @@ public class TestAMContainer {
 
     wc.assignTaskAttempt(attempt22, LRs, null);
     argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
-    verify(wc.tal, times(4)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+    verify(wc.tal, times(4)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
     fetchedTask = argumentCaptor.getAllValues().get(3);
     assertFalse(fetchedTask.haveCredentialsChanged());
     assertNull(fetchedTask.getCredentials());
@@ -1137,7 +1138,7 @@ public class TestAMContainer {
     wc.setNewDAGID(dagID3);
     wc.assignTaskAttempt(attempt31, LRs , dag3Credentials);
     argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
-    verify(wc.tal, times(5)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+    verify(wc.tal, times(5)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
     fetchedTask = argumentCaptor.getAllValues().get(4);
     assertTrue(fetchedTask.haveCredentialsChanged());
     assertNotNull(fetchedTask.getCredentials());
@@ -1147,7 +1148,7 @@ public class TestAMContainer {
 
     wc.assignTaskAttempt(attempt32, LRs, dag1Credentials);
     argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
-    verify(wc.tal, times(6)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+    verify(wc.tal, times(6)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
     fetchedTask = argumentCaptor.getAllValues().get(5);
     assertFalse(fetchedTask.haveCredentialsChanged());
     assertNull(fetchedTask.getCredentials());
@@ -1200,9 +1201,10 @@ public class TestAMContainer {
 
       chh = mock(ContainerHeartbeatHandler.class);
 
-      InetSocketAddress addr = new InetSocketAddress("localhost", 0);
       tal = mock(TaskAttemptListener.class);
-      doReturn(addr).when(tal).getAddress();
+      TaskCommunicator taskComm = mock(TaskCommunicator.class);
+      doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+      doReturn(taskComm).when(tal).getTaskCommunicator(0);
 
       dagID = TezDAGID.getInstance(applicationID, 1);
       vertexID = TezVertexID.getInstance(dagID, 1);
@@ -1228,7 +1230,7 @@ public class TestAMContainer {
       doReturn(taskAttemptID).when(taskSpec).getTaskAttemptID();
 
       amContainer = new AMContainerImpl(container, chh, tal,
-          new ContainerContextMatcher(), appContext);
+          new ContainerContextMatcher(), appContext, 0, 0, 0);
     }
 
     public WrappedContainer() {
@@ -1278,7 +1280,7 @@ public class TestAMContainer {
       Token<JobTokenIdentifier> jobToken = mock(Token.class);
       TokenCache.setSessionToken(jobToken, credentials);
       amContainer.handle(new AMContainerEventLaunchRequest(containerID, vertexID,
-          new ContainerContext(localResources, credentials, new HashMap<String, String>(), "")));
+          new ContainerContext(localResources, credentials, new HashMap<String, String>(), ""), 0, 0));
     }
 
     public void assignTaskAttempt(TezTaskAttemptID taID) {

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainerMap.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainerMap.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainerMap.java
index 61371e8..dee4541 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainerMap.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainerMap.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.Token;
+import org.apache.tez.dag.api.TaskCommunicator;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.dag.app.ContainerHeartbeatHandler;
 import org.apache.tez.dag.app.TaskAttemptListener;
@@ -43,8 +44,9 @@ public class TestAMContainerMap {
 
   private TaskAttemptListener mockTaskAttemptListener() {
     TaskAttemptListener tal = mock(TaskAttemptListener.class);
-    InetSocketAddress socketAddr = new InetSocketAddress("localhost", 21000);
-    doReturn(socketAddr).when(tal).getAddress();
+    TaskCommunicator taskComm = mock(TaskCommunicator.class);
+    doReturn(new InetSocketAddress("localhost", 21000)).when(taskComm).getAddress();
+    doReturn(taskComm).when(tal).getTaskCommunicator(0);
     return tal;
   }
 

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-examples/src/main/java/org/apache/tez/examples/JoinValidate.java
----------------------------------------------------------------------
diff --git a/tez-examples/src/main/java/org/apache/tez/examples/JoinValidate.java b/tez-examples/src/main/java/org/apache/tez/examples/JoinValidate.java
index d7fc5ac..52643c5 100644
--- a/tez-examples/src/main/java/org/apache/tez/examples/JoinValidate.java
+++ b/tez-examples/src/main/java/org/apache/tez/examples/JoinValidate.java
@@ -19,6 +19,7 @@
 package org.apache.tez.examples;
 
 import java.io.IOException;
+import java.util.Map;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -130,7 +131,7 @@ public class JoinValidate extends TezExampleBase {
 
   private DAG createDag(TezConfiguration tezConf, Path lhs, Path rhs, int numPartitions)
       throws IOException {
-    DAG dag = DAG.create("JoinValidate");
+    DAG dag = DAG.create(getDagName());
 
     // Configuration for intermediate output - shared by Vertex1 and Vertex2
     // This should only be setting selective keys from the underlying conf. Fix after there's a
@@ -147,15 +148,18 @@ public class JoinValidate extends TezExampleBase {
         MRInput
             .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
                 lhs.toUri().toString()).groupSplits(!isDisableSplitGrouping()).build());
+    setVertexProperties(lhsVertex, getLhsVertexProperties());
 
     Vertex rhsVertex = Vertex.create(RHS_INPUT_NAME, ProcessorDescriptor.create(
         ForwardingProcessor.class.getName())).addDataSource("rhs",
         MRInput
             .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
                 rhs.toUri().toString()).groupSplits(!isDisableSplitGrouping()).build());
+    setVertexProperties(rhsVertex, getRhsVertexProperties());
 
     Vertex joinValidateVertex = Vertex.create("joinvalidate", ProcessorDescriptor.create(
         JoinValidateProcessor.class.getName()), numPartitions);
+    setVertexProperties(joinValidateVertex, getValidateVertexProperties());
 
     Edge e1 = Edge.create(lhsVertex, joinValidateVertex, edgeConf.createDefaultEdgeProperty());
     Edge e2 = Edge.create(rhsVertex, joinValidateVertex, edgeConf.createDefaultEdgeProperty());
@@ -165,6 +169,30 @@ public class JoinValidate extends TezExampleBase {
     return dag;
   }
 
+  private void setVertexProperties(Vertex vertex, Map<String, String> properties) {
+    if (properties != null) {
+      for (Map.Entry<String, String> entry : properties.entrySet()) {
+        vertex.setConf(entry.getKey(), entry.getValue());
+      }
+    }
+  }
+
+  protected Map<String, String> getLhsVertexProperties() {
+    return null;
+  }
+
+  protected Map<String, String> getRhsVertexProperties() {
+    return null;
+  }
+
+  protected Map<String, String> getValidateVertexProperties() {
+    return null;
+  }
+
+  protected String getDagName() {
+    return "JoinValidate";
+  }
+
   public static class JoinValidateProcessor extends SimpleProcessor {
 
     private static final Logger LOG = LoggerFactory.getLogger(JoinValidateProcessor.class);

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
index e83165b..27356bc 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
@@ -14,6 +14,8 @@
 
 package org.apache.tez.dag.app.launcher;
 
+import java.net.InetSocketAddress;
+
 import com.google.common.base.Preconditions;
 import com.google.protobuf.ByteString;
 import org.apache.commons.logging.Log;
@@ -124,7 +126,8 @@ public class TezTestServiceContainerLauncher extends AbstractService implements
 
   private RunContainerRequestProto constructRunContainerRequest(NMCommunicatorLaunchRequestEvent event) {
     RunContainerRequestProto.Builder builder = RunContainerRequestProto.newBuilder();
-    builder.setAmHost(tal.getAddress().getHostName()).setAmPort(tal.getAddress().getPort());
+    InetSocketAddress address = tal.getTaskCommunicator(event.getTaskCommId()).getAddress();
+    builder.setAmHost(address.getHostName()).setAmPort(address.getPort());
     builder.setAppAttemptNumber(event.getContainer().getId().getApplicationAttemptId().getAttemptId());
     builder.setApplicationIdString(
         event.getContainer().getId().getApplicationAttemptId().getApplicationId().toString());

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
index e3c18bf..5657f86 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
@@ -14,7 +14,6 @@
 
 package org.apache.tez.dag.app.rm;
 
-import java.io.IOException;
 import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.List;
@@ -32,25 +31,17 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.client.api.AMRMClient;
-import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
-import org.apache.hadoop.yarn.exceptions.YarnException;
-import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.service.TezTestServiceConfConstants;
 
 
-// TODO Registration with RM - so that the AM is considered dead and restarted in the expiry interval - 10 minutes.
-
 public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
 
   private static final Log LOG = LogFactory.getLog(TezTestServiceTaskSchedulerService.class);
@@ -71,7 +62,7 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
   private final ConcurrentMap<Object, ContainerId> runningTasks =
       new ConcurrentHashMap<Object, ContainerId>();
 
-  private final AMRMClientAsync<AMRMClient.ContainerRequest> amRmClient;
+  // AppIdIdentifier to avoid conflicts with other containres in the system.
 
   // Per instance
   private final int memoryPerInstance;
@@ -82,10 +73,13 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
   private final Resource resourcePerContainer;
 
 
+  // Not registering with the RM. Assuming the main TezScheduler will always run (except local mode),
+  // and take care of YARN registration.
   public TezTestServiceTaskSchedulerService(TaskSchedulerAppCallback appClient,
                                             AppContext appContext,
                                             String clientHostname, int clientPort,
                                             String trackingUrl,
+                                            long customAppIdIdentifier,
                                             Configuration conf) {
     // Accepting configuration here to allow setting up fields as final
     super(TezTestServiceTaskSchedulerService.class.getName());
@@ -93,7 +87,7 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
     this.appClientDelegate = createAppCallbackDelegate(appClient);
     this.appContext = appContext;
     this.serviceHosts = new LinkedList<String>();
-    this.containerFactory = new ContainerFactory(appContext);
+    this.containerFactory = new ContainerFactory(appContext, customAppIdIdentifier);
 
     this.memoryPerInstance = conf
         .getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB, -1);
@@ -123,7 +117,6 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
     int memoryPerContainer = (int) (memoryPerInstance / (float) executorsPerInstance);
     int coresPerContainer = (int) (coresPerInstance / (float) executorsPerInstance);
     this.resourcePerContainer = Resource.newInstance(memoryPerContainer, coresPerContainer);
-    this.amRmClient = TezAMRMClientAsync.createAMRMClientAsync(5000, new FakeAmRmCallbackHandler());
 
     String[] hosts = conf.getTrimmedStrings(TezTestServiceConfConstants.TEZ_TEST_SERVICE_HOSTS);
     if (hosts == null || hosts.length == 0) {
@@ -143,36 +136,8 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
   }
 
   @Override
-  public void serviceInit(Configuration conf) {
-    amRmClient.init(conf);
-  }
-
-  @Override
-  public void serviceStart() {
-    amRmClient.start();
-    RegisterApplicationMasterResponse response;
-    try {
-      amRmClient.registerApplicationMaster(clientHostname, clientPort, trackingUrl);
-    } catch (YarnException e) {
-      throw new TezUncheckedException(e);
-    } catch (IOException e) {
-      throw new TezUncheckedException(e);
-    }
-  }
-
-  @Override
   public void serviceStop() {
     if (!this.isStopped.getAndSet(true)) {
-
-      try {
-        TaskSchedulerAppCallback.AppFinalStatus status = appClientDelegate.getFinalAppStatus();
-        amRmClient.unregisterApplicationMaster(status.exitStatus, status.exitMessage,
-            status.postCompletionTrackingUrl);
-      } catch (YarnException e) {
-        throw new TezUncheckedException(e);
-      } catch (IOException e) {
-        throw new TezUncheckedException(e);
-      }
       appCallbackExecutor.shutdownNow();
     }
   }
@@ -264,7 +229,7 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
 
   private ExecutorService createAppCallbackExecutorService() {
     return Executors.newSingleThreadExecutor(new ThreadFactoryBuilder()
-        .setNameFormat("TaskSchedulerAppCaller #%d").setDaemon(true).build());
+        .setNameFormat("TezTestTaskSchedulerAppCaller").setDaemon(true).build());
   }
 
   private TaskSchedulerAppCallback createAppCallbackDelegate(
@@ -274,7 +239,7 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
   }
 
   private String selectHost(String[] requestedHosts) {
-    String host = null;
+    String host;
     if (requestedHosts != null && requestedHosts.length > 0) {
       Arrays.sort(requestedHosts);
       host = requestedHosts[0];
@@ -287,17 +252,19 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
   }
 
   static class ContainerFactory {
-    final AppContext appContext;
     AtomicInteger nextId;
-
-    public ContainerFactory(AppContext appContext) {
-      this.appContext = appContext;
-      this.nextId = new AtomicInteger(2);
+    final ApplicationAttemptId customAppAttemptId;
+
+    public ContainerFactory(AppContext appContext, long appIdLong) {
+      this.nextId = new AtomicInteger(1);
+      ApplicationId appId = ApplicationId
+          .newInstance(appIdLong, appContext.getApplicationAttemptId().getApplicationId().getId());
+      this.customAppAttemptId = ApplicationAttemptId
+          .newInstance(appId, appContext.getApplicationAttemptId().getAttemptId());
     }
 
     public Container createContainer(Resource capability, Priority priority, String hostname, int port) {
-      ApplicationAttemptId appAttemptId = appContext.getApplicationAttemptId();
-      ContainerId containerId = ContainerId.newInstance(appAttemptId, nextId.getAndIncrement());
+      ContainerId containerId = ContainerId.newInstance(customAppAttemptId, nextId.getAndIncrement());
       NodeId nodeId = NodeId.newInstance(hostname, port);
       String nodeHttpAddress = "hostname:0";
 
@@ -311,37 +278,4 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
       return container;
     }
   }
-
-  private static class FakeAmRmCallbackHandler implements AMRMClientAsync.CallbackHandler {
-
-    @Override
-    public void onContainersCompleted(List<ContainerStatus> statuses) {
-
-    }
-
-    @Override
-    public void onContainersAllocated(List<Container> containers) {
-
-    }
-
-    @Override
-    public void onShutdownRequest() {
-
-    }
-
-    @Override
-    public void onNodesUpdated(List<NodeReport> updatedNodes) {
-
-    }
-
-    @Override
-    public float getProgress() {
-      return 0;
-    }
-
-    @Override
-    public void onError(Throwable e) {
-
-    }
-  }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-ext-service-tests/src/test/java/org/apache/tez/examples/JoinValidateConfigured.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/examples/JoinValidateConfigured.java b/tez-ext-service-tests/src/test/java/org/apache/tez/examples/JoinValidateConfigured.java
new file mode 100644
index 0000000..e5d2e3b
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/examples/JoinValidateConfigured.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.examples;
+
+import java.util.Map;
+
+public class JoinValidateConfigured extends JoinValidate {
+
+  private final Map<String, String> lhsProps;
+  private final Map<String, String> rhsProps;
+  private final Map<String, String> validateProps;
+  private final String dagNameSuffix;
+
+  public JoinValidateConfigured(Map<String, String> lhsProps, Map<String, String> rhsProps,
+                                Map<String, String> validateProps, String dagNameSuffix) {
+    this.lhsProps = lhsProps;
+    this.rhsProps = rhsProps;
+    this.validateProps = validateProps;
+    this.dagNameSuffix = dagNameSuffix;
+  }
+
+  @Override
+  protected Map<String, String> getLhsVertexProperties() {
+    return this.lhsProps;
+  }
+
+  @Override
+  protected Map<String, String> getRhsVertexProperties() {
+    return this.rhsProps;
+  }
+
+  @Override
+  protected Map<String, String> getValidateVertexProperties() {
+    return this.validateProps;
+  }
+
+  @Override
+  protected String getDagName() {
+    return "JoinValidate_" + dagNameSuffix;
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
index ae7e7f8..9c149c6 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
@@ -15,11 +15,11 @@
 package org.apache.tez.tests;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
 import java.util.Map;
 
+import com.google.common.collect.Maps;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -28,13 +28,14 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.tez.client.TezClient;
 import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezConstants;
 import org.apache.tez.dag.api.TezException;
 import org.apache.tez.dag.app.launcher.TezTestServiceNoOpContainerLauncher;
 import org.apache.tez.dag.app.rm.TezTestServiceTaskSchedulerService;
 import org.apache.tez.dag.app.taskcomm.TezTestServiceTaskCommunicatorImpl;
 import org.apache.tez.examples.HashJoinExample;
 import org.apache.tez.examples.JoinDataGen;
-import org.apache.tez.examples.JoinValidate;
+import org.apache.tez.examples.JoinValidateConfigured;
 import org.apache.tez.service.MiniTezTestServiceCluster;
 import org.apache.tez.test.MiniTezCluster;
 import org.junit.AfterClass;
@@ -47,23 +48,31 @@ public class TestExternalTezServices {
 
   private static final String EXT_PUSH_ENTITY_NAME = "ExtServiceTestPush";
 
-  private static MiniTezCluster tezCluster;
-  private static MiniDFSCluster dfsCluster;
-  private static MiniTezTestServiceCluster tezTestServiceCluster;
+  private static volatile MiniTezCluster tezCluster;
+  private static volatile MiniDFSCluster dfsCluster;
+  private static volatile MiniTezTestServiceCluster tezTestServiceCluster;
 
-  private static Configuration clusterConf = new Configuration();
-  private static Configuration confForJobs;
+  private static volatile Configuration clusterConf = new Configuration();
+  private static volatile Configuration confForJobs;
 
-  private static FileSystem remoteFs;
-  private static FileSystem localFs;
+  private static volatile FileSystem remoteFs;
+  private static volatile FileSystem localFs;
 
-  private static TezClient sharedTezClient;
+  private static volatile TezClient sharedTezClient;
+
+  private static final Path SRC_DATA_DIR = new Path("/tmp/" + TestExternalTezServices.class.getSimpleName());
+  private static final Path HASH_JOIN_EXPECTED_RESULT_PATH = new Path(SRC_DATA_DIR, "expectedOutputPath");
+  private static final Path HASH_JOIN_OUTPUT_PATH = new Path(SRC_DATA_DIR, "outPath");
+
+  private static final Map<String, String> PROPS_EXT_SERVICE_PUSH = Maps.newHashMap();
+  private static final Map<String, String> PROPS_REGULAR_CONTAINERS = Maps.newHashMap();
+  private static final Map<String, String> PROPS_IN_AM = Maps.newHashMap();
 
   private static String TEST_ROOT_DIR = "target" + Path.SEPARATOR + TestExternalTezServices.class.getName()
       + "-tmpDir";
 
   @BeforeClass
-  public static void setup() throws IOException, TezException, InterruptedException {
+  public static void setup() throws Exception {
 
     localFs = FileSystem.getLocal(clusterConf);
 
@@ -108,27 +117,79 @@ public class TestExternalTezServices {
     remoteFs.mkdirs(stagingDirPath);
     // This is currently configured to push tasks into the Service, and then use the standard RPC
     confForJobs.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
-    confForJobs.set(TezConfiguration.TEZ_AM_TASK_SCHEDULERS,
+
+    confForJobs.setStrings(TezConfiguration.TEZ_AM_TASK_SCHEDULERS,
+        TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT,
+//        TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
         EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceTaskSchedulerService.class.getName());
-    confForJobs.set(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHERS,
-        EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceNoOpContainerLauncher.class.getName());
-    confForJobs.set(TezConfiguration.TEZ_AM_TASK_COMMUNICATORS,
-        EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceTaskCommunicatorImpl.class.getName());
 
-    confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME, EXT_PUSH_ENTITY_NAME);
-    confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME, EXT_PUSH_ENTITY_NAME);
-    confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME, EXT_PUSH_ENTITY_NAME);
+    confForJobs.setStrings(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHERS,
+        TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT,
+//        TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
+        EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceNoOpContainerLauncher.class.getName());
 
+    confForJobs.setStrings(TezConfiguration.TEZ_AM_TASK_COMMUNICATORS,
+        TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT,
+//        TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
+        EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceTaskCommunicatorImpl.class.getName());
 
-    TezConfiguration tezConf = new TezConfiguration(confForJobs);
+    // Default all jobs to run via the service. Individual tests override this on a per vertex/dag level.
+    confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME,
+        TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
+    confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME,
+        TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
+    confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME,
+        TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
+
+    // Setup various executor sets
+    PROPS_REGULAR_CONTAINERS.put(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME,
+        TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
+    PROPS_REGULAR_CONTAINERS.put(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME,
+        TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
+    PROPS_REGULAR_CONTAINERS.put(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME,
+        TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
+
+    PROPS_EXT_SERVICE_PUSH.put(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME, EXT_PUSH_ENTITY_NAME);
+    PROPS_EXT_SERVICE_PUSH.put(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME, EXT_PUSH_ENTITY_NAME);
+    PROPS_EXT_SERVICE_PUSH.put(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME, EXT_PUSH_ENTITY_NAME);
+
+    PROPS_IN_AM.put(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME,
+        TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT);
+    PROPS_IN_AM.put(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME,
+        TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT);
+    PROPS_IN_AM.put(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME,
+        TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT);
+
+
+    // Create a session to use for all tests.
+    TezConfiguration tezClientConf = new TezConfiguration(confForJobs);
 
     sharedTezClient = TezClient.create(TestExternalTezServices.class.getSimpleName() + "_session",
-        tezConf, true);
+        tezClientConf, true);
     sharedTezClient.start();
     LOG.info("Shared TezSession started");
     sharedTezClient.waitTillReady();
     LOG.info("Shared TezSession ready for submission");
 
+    // Generate the join data set used for each run.
+    // Can a timeout be enforced here ?
+    remoteFs.mkdirs(SRC_DATA_DIR);
+    Path dataPath1 = new Path(SRC_DATA_DIR, "inPath1");
+    Path dataPath2 = new Path(SRC_DATA_DIR, "inPath2");
+    TezConfiguration tezConf = new TezConfiguration(confForJobs);
+    //   Generate join data - with 2 tasks.
+    JoinDataGen dataGen = new JoinDataGen();
+    String[] dataGenArgs = new String[]{
+        dataPath1.toString(), "1048576", dataPath2.toString(), "524288",
+        HASH_JOIN_EXPECTED_RESULT_PATH.toString(), "2"};
+    assertEquals(0, dataGen.run(tezConf, dataGenArgs, sharedTezClient));
+    //    Run the actual join - with 2 reducers
+    HashJoinExample joinExample = new HashJoinExample();
+    String[] args = new String[]{
+        dataPath1.toString(), dataPath2.toString(), "2", HASH_JOIN_OUTPUT_PATH.toString()};
+    assertEquals(0, joinExample.run(tezConf, args, sharedTezClient));
+
+    LOG.info("Completed generating Data - Expected Hash Result and Actual Join Result");
   }
 
   @AfterClass
@@ -156,35 +217,50 @@ public class TestExternalTezServices {
 
 
   @Test(timeout = 60000)
-  public void test1() throws Exception {
-    Path testDir = new Path("/tmp/testHashJoinExample");
+  public void testAllInService() throws Exception {
+    int expectedExternalSubmissions = 4 + 3; //4 for 4 src files, 3 for num reducers.
+    runJoinValidate("AllInService", expectedExternalSubmissions, PROPS_EXT_SERVICE_PUSH,
+        PROPS_EXT_SERVICE_PUSH, PROPS_EXT_SERVICE_PUSH);
+  }
 
-    remoteFs.mkdirs(testDir);
+  @Test(timeout = 60000)
+  public void testAllInContainers() throws Exception {
+    int expectedExternalSubmissions = 0; // All in containers
+    runJoinValidate("AllInContainers", expectedExternalSubmissions, PROPS_REGULAR_CONTAINERS,
+        PROPS_REGULAR_CONTAINERS, PROPS_REGULAR_CONTAINERS);
+  }
 
-    Path dataPath1 = new Path(testDir, "inPath1");
-    Path dataPath2 = new Path(testDir, "inPath2");
-    Path expectedOutputPath = new Path(testDir, "expectedOutputPath");
-    Path outPath = new Path(testDir, "outPath");
+  @Test(timeout = 60000)
+  public void testMixed1() throws Exception { // M-ExtService, R-containers
+    int expectedExternalSubmissions = 4 + 0; //4 for 4 src files, 3 for num reducers.
+    runJoinValidate("Mixed1", expectedExternalSubmissions, PROPS_EXT_SERVICE_PUSH,
+        PROPS_EXT_SERVICE_PUSH, PROPS_REGULAR_CONTAINERS);
+  }
 
-    TezConfiguration tezConf = new TezConfiguration(confForJobs);
+  @Test(timeout = 60000)
+  public void testMixed2() throws Exception { // M-Containers, R-ExtService
+    int expectedExternalSubmissions = 0 + 3; //4 for 4 src files, 3 for num reducers.
+    runJoinValidate("Mixed2", expectedExternalSubmissions, PROPS_REGULAR_CONTAINERS,
+        PROPS_REGULAR_CONTAINERS, PROPS_EXT_SERVICE_PUSH);
+  }
 
-    JoinDataGen dataGen = new JoinDataGen();
-    String[] dataGenArgs = new String[]{
-        dataPath1.toString(), "1048576", dataPath2.toString(), "524288",
-        expectedOutputPath.toString(), "2"};
-    assertEquals(0, dataGen.run(tezConf, dataGenArgs, sharedTezClient));
 
-    HashJoinExample joinExample = new HashJoinExample();
-    String[] args = new String[]{
-        dataPath1.toString(), dataPath2.toString(), "2", outPath.toString()};
-    assertEquals(0, joinExample.run(tezConf, args, sharedTezClient));
+  private void runJoinValidate(String name, int extExpectedCount, Map<String, String> lhsProps,
+                               Map<String, String> rhsProps,
+                               Map<String, String> validateProps) throws
+      Exception {
+    int externalSubmissionCount = tezTestServiceCluster.getNumSubmissions();
 
-    JoinValidate joinValidate = new JoinValidate();
-    String[] validateArgs = new String[]{
-        expectedOutputPath.toString(), outPath.toString(), "3"};
+    TezConfiguration tezConf = new TezConfiguration(confForJobs);
+    JoinValidateConfigured joinValidate =
+        new JoinValidateConfigured(lhsProps, rhsProps,
+            validateProps, name);
+    String[] validateArgs = new String[]{"-disableSplitGrouping",
+        HASH_JOIN_EXPECTED_RESULT_PATH.toString(), HASH_JOIN_OUTPUT_PATH.toString(), "3"};
     assertEquals(0, joinValidate.run(tezConf, validateArgs, sharedTezClient));
 
     // Ensure this was actually submitted to the external cluster
-    assertTrue(tezTestServiceCluster.getNumSubmissions() > 0);
+    assertEquals(extExpectedCount,
+        (tezTestServiceCluster.getNumSubmissions() - externalSubmissionCount));
   }
 }


[15/43] tez git commit: TEZ-2410. VertexGroupCommitFinishedEvent & VertexCommitStartedEvent is not logged correctly (zjffdu)

Posted by ss...@apache.org.
TEZ-2410. VertexGroupCommitFinishedEvent & VertexCommitStartedEvent is not logged correctly (zjffdu)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/4a6808ce
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/4a6808ce
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/4a6808ce

Branch: refs/heads/TEZ-2003
Commit: 4a6808ce4c99458653bbe4328dfcad24649a48fb
Parents: 05f77fe
Author: Jeff Zhang <zj...@apache.org>
Authored: Fri May 8 12:42:46 2015 +0800
Committer: Jeff Zhang <zj...@apache.org>
Committed: Fri May 8 12:42:46 2015 +0800

----------------------------------------------------------------------
 CHANGES.txt                                     |   1 +
 .../apache/tez/dag/app/dag/impl/DAGImpl.java    |  61 ++-
 .../apache/tez/dag/app/dag/impl/VertexImpl.java |   1 -
 .../apache/tez/dag/app/dag/impl/TestCommit.java | 454 ++++++++++++++++++-
 4 files changed, 477 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/4a6808ce/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index ba8e9d8..3520768 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,7 @@ INCOMPATIBLE CHANGES
     Default max limit increased. Should not affect existing users.
 
 ALL CHANGES:
+  TEZ-2410. VertexGroupCommitFinishedEvent & VertexCommitStartedEvent is not logged correctly
   TEZ-776. Reduce AM mem usage caused by storing TezEvents
   TEZ-2423. Tez UI: Remove Attempt Index column from task->attempts page
   TEZ-2416. Tez UI: Make tooltips display faster.

http://git-wip-us.apache.org/repos/asf/tez/blob/4a6808ce/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
index f769565..1726c18 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
@@ -212,7 +212,6 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
   @VisibleForTesting
   Map<OutputKey, ListenableFuture<Void>> commitFutures
     = new HashMap<OutputKey, ListenableFuture<Void>>();
-  private Set<OutputKey> succeededCommits = new HashSet<OutputKey>();
 
   private static final DiagnosticsUpdateTransition
       DIAGNOSTIC_UPDATE_TRANSITION = new DiagnosticsUpdateTransition();
@@ -457,7 +456,9 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
     Set<String> outputs;
     Map<String, InputDescriptor> edgeMergedInputs;
     int successfulMembers;
-    boolean committed;
+    int successfulCommits;
+    boolean commitStarted;
+
     VertexGroupInfo(PlanVertexGroupInfo groupInfo) {
       groupName = groupInfo.getGroupName();
       groupMembers = Sets.newHashSet(groupInfo.getGroupMembersList());
@@ -468,10 +469,20 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
       }
       outputs = Sets.newHashSet(groupInfo.getOutputsList());
       successfulMembers = 0;
-      committed = false;
+      successfulCommits = 0;
+      commitStarted = false;
+    }
+
+    public boolean isInCommitting() {
+      return commitStarted && successfulCommits < outputs.size();
+    }
+
+    public boolean isCommitted() {
+      return commitStarted && successfulCommits == outputs.size();
     }
   }
 
+
   public DAGImpl(TezDAGID dagId,
       Configuration amConf,
       DAGPlan jobPlan,
@@ -962,7 +973,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
     // commit all shared outputs
     for (final VertexGroupInfo groupInfo : vertexGroups.values()) {
       if (!groupInfo.outputs.isEmpty()) {
-        groupInfo.committed = true;
+        groupInfo.commitStarted = true;
         final Vertex v = getVertex(groupInfo.groupMembers.iterator().next());
         for (final String outputName : groupInfo.outputs) {
           final OutputKey outputKey = new OutputKey(outputName, groupInfo.groupName, true);
@@ -1920,7 +1931,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
                 + " data, groupName=" + groupInfo.groupName);
             continue;
           }
-          groupInfo.committed = true;
+          groupInfo.commitStarted = true;
           final Vertex v = getVertex(groupInfo.groupMembers.iterator().next());
           try {
             appContext.getHistoryHandler().handleCriticalEvent(new DAGHistoryEvent(getID(),
@@ -1966,11 +1977,19 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
       + ", vertexId=" + vertex.getVertexId());
 
     if (!commitAllOutputsOnSuccess) {
-      // partial output may already have been committed. fail if so
+      // partial output may already have been in committing or committed. fail if so
       List<VertexGroupInfo> groupList = vertexGroupInfo.get(vertex.getName());
       if (groupList != null) {
         for (VertexGroupInfo groupInfo : groupList) {
-          if (groupInfo.committed) {
+          if (groupInfo.isInCommitting()) {
+            String msg = "Aborting job as committing vertex: "
+                + vertex.getLogIdentifier() + " is re-running";
+            LOG.info(msg);
+            addDiagnostic(msg);
+            enactKill(DAGTerminationCause.VERTEX_RERUN_IN_COMMITTING,
+                VertexTerminationCause.VERTEX_RERUN_IN_COMMITTING);
+            return true;
+          } else if (groupInfo.isCommitted()) {
             String msg = "Aborting job as committed vertex: "
                 + vertex.getLogIdentifier() + " is re-running";
             LOG.info(msg);
@@ -2091,17 +2110,23 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
     boolean recoveryFailed = false;
     if (commitCompletedEvent.isSucceeded()) {
       LOG.info("Commit succeeded for output:" + commitCompletedEvent.getOutputKey());
-      succeededCommits.add(commitCompletedEvent.getOutputKey());
-      if (!commitAllOutputsOnSuccess) {
-        try {
-          appContext.getHistoryHandler().handleCriticalEvent(new DAGHistoryEvent(getID(),
-              new VertexGroupCommitFinishedEvent(getID(), commitCompletedEvent.getOutputKey().getEntityName(),
-                  clock.getTime())));
-        } catch (IOException e) {
-          String diag = "Failed to send commit recovery event to handler, " + ExceptionUtils.getStackTrace(e);
-          addDiagnostic(diag);
-          LOG.error(diag);
-          recoveryFailed = true;
+      OutputKey outputKey = commitCompletedEvent.getOutputKey();
+      if (outputKey.isVertexGroupOutput){
+        VertexGroupInfo vertexGroup = vertexGroups.get(outputKey.getEntityName());
+        vertexGroup.successfulCommits++;
+        if (vertexGroup.isCommitted()) {
+          if (!commitAllOutputsOnSuccess) {
+            try {
+              appContext.getHistoryHandler().handleCriticalEvent(new DAGHistoryEvent(getID(),
+                  new VertexGroupCommitFinishedEvent(getID(), commitCompletedEvent.getOutputKey().getEntityName(),
+                      clock.getTime())));
+            } catch (IOException e) {
+              String diag = "Failed to send commit recovery event to handler, " + ExceptionUtils.getStackTrace(e);
+              addDiagnostic(diag);
+              LOG.error(diag);
+              recoveryFailed = true;
+            }
+          }
         }
       }
     } else {

http://git-wip-us.apache.org/repos/asf/tez/blob/4a6808ce/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
index a16ee0a..3a9558d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
@@ -1901,7 +1901,6 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
             vertex.trySetTerminationCause(VertexTerminationCause.RECOVERY_ERROR);
             return vertex.finished(VertexState.FAILED);
           }
-        } else {
           firstCommit = false;
         }
         VertexCommitCallback commitCallback = new VertexCommitCallback(vertex, outputName);

http://git-wip-us.apache.org/repos/asf/tez/blob/4a6808ce/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
index 0df8a4f..8fc29c2 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
@@ -26,9 +26,9 @@ import java.io.DataOutput;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
-import java.util.Map;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -63,7 +63,6 @@ import org.apache.tez.dag.api.OutputDescriptor;
 import org.apache.tez.dag.api.ProcessorDescriptor;
 import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.UserPayload;
-import org.apache.tez.dag.api.VertexManagerPlugin;
 import org.apache.tez.dag.api.VertexManagerPluginContext;
 import org.apache.tez.dag.api.VertexManagerPluginDescriptor;
 import org.apache.tez.dag.api.client.VertexStatus;
@@ -96,11 +95,13 @@ import org.apache.tez.dag.app.dag.event.VertexEventTaskReschedule;
 import org.apache.tez.dag.app.dag.event.VertexEventType;
 import org.apache.tez.dag.app.dag.impl.DAGImpl.OutputKey;
 import org.apache.tez.dag.history.DAGHistoryEvent;
+import org.apache.tez.dag.history.HistoryEvent;
 import org.apache.tez.dag.history.HistoryEventHandler;
 import org.apache.tez.dag.history.HistoryEventType;
+import org.apache.tez.dag.history.events.*;
 import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezTaskID;
-import org.apache.tez.runtime.api.Event;
+import org.apache.tez.dag.records.TezVertexID;
 import org.apache.tez.runtime.api.OutputCommitter;
 import org.apache.tez.runtime.api.OutputCommitterContext;
 import org.apache.tez.runtime.api.events.VertexManagerEvent;
@@ -144,7 +145,7 @@ public class TestCommit {
   private TaskHeartbeatHandler thh;
   private Clock clock = new SystemClock();
   private DAGFinishEventHandler dagFinishEventHandler;
-  private HistoryEventHandler historyEventHandler;
+  private MockHistoryEventHandler historyEventHandler;
   private TaskAttemptEventDispatcher taskAttemptEventDispatcher;
 
   private ExecutorService rawExecutor;
@@ -305,7 +306,7 @@ public class TestCommit {
     execService = MoreExecutors.listeningDecorator(rawExecutor);
 
     doReturn(execService).when(appContext).getExecService();
-    historyEventHandler = mock(HistoryEventHandler.class);
+    historyEventHandler = new MockHistoryEventHandler(appContext);
     aclManager = new ACLManager("amUser");
     doReturn(conf).when(appContext).getAMConf();
     doReturn(appAttemptId).when(appContext).getApplicationAttemptId();
@@ -441,6 +442,63 @@ public class TestCommit {
     return dag.createDag(conf, null, null, null, true);
   }
 
+  // v1->v3
+  // v2->v3
+  // vertex_group (v1, v2) has 2 shared outputs
+  private DAGPlan createDAGPlanWith2VertexGroupOutputs(boolean vertexGroupCommitSucceeded1,
+    boolean vertexGroupCommitSucceeded2, boolean v3CommitSucceeded) throws Exception {
+    LOG.info("Setting up group dag plan");
+    int dummyTaskCount = 1;
+    Resource dummyTaskResource = Resource.newInstance(1, 1);
+    org.apache.tez.dag.api.Vertex v1 = org.apache.tez.dag.api.Vertex.create(
+            "vertex1", ProcessorDescriptor.create("Processor"), dummyTaskCount,
+            dummyTaskResource);
+    org.apache.tez.dag.api.Vertex v2 = org.apache.tez.dag.api.Vertex.create(
+        "vertex2", ProcessorDescriptor.create("Processor"), dummyTaskCount,
+        dummyTaskResource);
+    org.apache.tez.dag.api.Vertex v3 = org.apache.tez.dag.api.Vertex.create(
+        "vertex3", ProcessorDescriptor.create("Processor"), dummyTaskCount,
+        dummyTaskResource);
+
+    DAG dag = DAG.create("testDag");
+    String groupName1 = "uv12";
+    OutputCommitterDescriptor ocd1 = OutputCommitterDescriptor.create(
+        CountingOutputCommitter.class.getName()).setUserPayload(
+        UserPayload.create(ByteBuffer
+            .wrap(new CountingOutputCommitter.CountingOutputCommitterConfig(
+                !vertexGroupCommitSucceeded1, true).toUserPayload())));
+    OutputCommitterDescriptor ocd2 = OutputCommitterDescriptor.create(
+    CountingOutputCommitter.class.getName()).setUserPayload(
+    UserPayload.create(ByteBuffer
+        .wrap(new CountingOutputCommitter.CountingOutputCommitterConfig(
+                !vertexGroupCommitSucceeded2, true).toUserPayload())));
+    OutputCommitterDescriptor ocd3 = OutputCommitterDescriptor.create(
+        CountingOutputCommitter.class.getName()).setUserPayload(
+        UserPayload.create(ByteBuffer
+            .wrap(new CountingOutputCommitter.CountingOutputCommitterConfig(
+                !v3CommitSucceeded, true).toUserPayload())));
+
+    org.apache.tez.dag.api.VertexGroup uv12 = dag.createVertexGroup(groupName1,
+        v1, v2);
+    OutputDescriptor outDesc = OutputDescriptor.create("output.class");
+    uv12.addDataSink("v12Out1", DataSinkDescriptor.create(outDesc, ocd1, null));
+    uv12.addDataSink("v12Out2", DataSinkDescriptor.create(outDesc, ocd2, null));
+    v3.addDataSink("v3Out", DataSinkDescriptor.create(outDesc, ocd3, null));
+
+    GroupInputEdge e1 = GroupInputEdge.create(uv12, v3, EdgeProperty.create(
+        DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED,
+        SchedulingType.SEQUENTIAL,
+        OutputDescriptor.create("dummy output class"),
+        InputDescriptor.create("dummy input class")), InputDescriptor
+        .create("merge.class"));
+
+    dag.addVertex(v1);
+    dag.addVertex(v2);
+    dag.addVertex(v3);
+    dag.addEdge(e1);
+    return dag.createDag(conf, null, null, null, true);
+  }
+
   private DAGPlan createDAGPlan_SingleVertexWith2Committer(
       boolean commit1Succeed, boolean commit2Succeed) throws IOException {
     return createDAGPlan_SingleVertexWith2Committer(commit1Succeed, commit2Succeed, false);
@@ -493,7 +551,7 @@ public class TestCommit {
   }
 
   @Test(timeout = 5000)
-  public void testVertexSucceedWithoutCommit() throws Exception {
+  public void testVertexCommit_OnDAGSuccess() throws Exception {
     conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS,
         true);
     setupDAG(createDAGPlan_SingleVertexWith2Committer(true, true));
@@ -509,6 +567,9 @@ public class TestCommit {
         .getOutputCommitter("v1Out_1");
     CountingOutputCommitter v1OutputCommitter_2 = (CountingOutputCommitter) v1
         .getOutputCommitter("v1Out_2");
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+
     Assert.assertEquals(1, v1OutputCommitter_1.initCounter);
     Assert.assertEquals(1, v1OutputCommitter_1.setupCounter);
     Assert.assertEquals(0, v1OutputCommitter_1.commitCounter);
@@ -544,6 +605,8 @@ public class TestCommit {
     waitUntil(v1, VertexState.SUCCEEDED);
     Assert.assertNull(v1.getTerminationCause());
     Assert.assertTrue(v1.commitFutures.isEmpty());
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
 
     Assert.assertEquals(1, v1OutputCommitter_1.initCounter);
     Assert.assertEquals(1, v1OutputCommitter_1.setupCounter);
@@ -576,8 +639,11 @@ public class TestCommit {
     Assert.assertEquals(VertexTerminationCause.COMMIT_FAILURE,
         v1.getTerminationCause());
     Assert.assertTrue(v1.commitFutures.isEmpty());
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
     CountingOutputCommitter v1OutputCommitter_2 = (CountingOutputCommitter) v1
         .getOutputCommitter("v1Out_2");
+
     Assert.assertEquals(1, v1OutputCommitter_1.initCounter);
     Assert.assertEquals(1, v1OutputCommitter_1.setupCounter);
     Assert.assertEquals(1, v1OutputCommitter_1.commitCounter);
@@ -606,11 +672,13 @@ public class TestCommit {
     v1OutputCommitter_1.unblockCommit();
     waitForCommitCompleted(v1, "v1Out_1");
     Assert.assertEquals(VertexState.COMMITTING, v1.getState());
-    
+
     CountingOutputCommitter v1OutputCommitter_2 = (CountingOutputCommitter) v1
         .getOutputCommitter("v1Out_2");
     v1OutputCommitter_2.unblockCommit();
     waitUntil(v1, VertexState.FAILED);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
 
     Assert.assertEquals(VertexTerminationCause.COMMIT_FAILURE,
         v1.getTerminationCause());
@@ -647,6 +715,8 @@ public class TestCommit {
     Assert.assertEquals(DAGState.KILLED, dag.getState());
     Assert
         .assertEquals(DAGTerminationCause.DAG_KILL, dag.getTerminationCause());
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
 
     CountingOutputCommitter v1OutputCommitter_1 = (CountingOutputCommitter) v1
         .getOutputCommitter("v1Out_1");
@@ -685,6 +755,8 @@ public class TestCommit {
     Assert.assertEquals(DAGState.FAILED, dag.getState());
     Assert.assertEquals(DAGTerminationCause.VERTEX_FAILURE,
         dag.getTerminationCause());
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
 
     CountingOutputCommitter v1OutputCommitter_1 = (CountingOutputCommitter) v1
         .getOutputCommitter("v1Out_1");
@@ -727,6 +799,8 @@ public class TestCommit {
     Assert.assertEquals(DAGState.FAILED, dag.getState());
     Assert.assertEquals(DAGTerminationCause.VERTEX_FAILURE,
         dag.getTerminationCause());
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
 
     CountingOutputCommitter v1OutputCommitter_1 = (CountingOutputCommitter) v1
         .getOutputCommitter("v1Out_1");
@@ -764,6 +838,8 @@ public class TestCommit {
     Assert.assertEquals(DAGState.ERROR, dag.getState());
     Assert.assertEquals(DAGTerminationCause.INTERNAL_ERROR,
         dag.getTerminationCause());
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
 
     CountingOutputCommitter v1OutputCommitter_1 = (CountingOutputCommitter) v1
         .getOutputCommitter("v1Out_1");
@@ -811,6 +887,20 @@ public class TestCommit {
     waitUntil(dag, DAGState.SUCCEEDED);
     Assert.assertTrue(dag.commitFutures.isEmpty());
     Assert.assertNull(dag.getTerminationCause());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("v1", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("v1", 0);
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("v3", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("v3", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 1);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
 
     Assert.assertEquals(1, v12OutputCommitter.initCounter);
     Assert.assertEquals(1, v12OutputCommitter.setupCounter);
@@ -857,10 +947,21 @@ public class TestCommit {
     Assert.assertEquals(VertexState.SUCCEEDED, v1.getState());
     Assert.assertEquals(VertexState.SUCCEEDED, v2.getState());
     Assert.assertEquals(VertexState.SUCCEEDED, v3.getState());
-
     Assert.assertEquals(DAGTerminationCause.COMMIT_FAILURE,
         dag.getTerminationCause());
     Assert.assertTrue(dag.commitFutures.isEmpty());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("v1", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("v1", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 1);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
 
     Assert.assertEquals(1, v12OutputCommitter.initCounter);
     Assert.assertEquals(1, v12OutputCommitter.setupCounter);
@@ -899,6 +1000,8 @@ public class TestCommit {
         .getOutputCommitter("v3Out");
     v12OutputCommitter.unblockCommit();
     waitUntil(dag, DAGState.FAILED);
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
 
     Assert.assertEquals(VertexState.SUCCEEDED, v1.getState());
     Assert.assertEquals(VertexState.SUCCEEDED, v2.getState());
@@ -907,6 +1010,18 @@ public class TestCommit {
     Assert.assertEquals(DAGTerminationCause.COMMIT_FAILURE,
         dag.getTerminationCause());
     Assert.assertTrue(dag.commitFutures.isEmpty());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("v1", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("v1", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 1);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
 
     Assert.assertEquals(1, v12OutputCommitter.initCounter);
     Assert.assertEquals(1, v12OutputCommitter.setupCounter);
@@ -953,6 +1068,18 @@ public class TestCommit {
     v12OutputCommitter.unblockCommit();
     waitUntil(dag, DAGState.SUCCEEDED);
     Assert.assertTrue(dag.commitFutures.isEmpty());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 1);
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("v1", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("v1", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
 
     Assert.assertEquals(1, v12OutputCommitter.initCounter);
     Assert.assertEquals(1, v12OutputCommitter.setupCounter);
@@ -1001,6 +1128,18 @@ public class TestCommit {
     v3OutputCommitter.unblockCommit();
     waitUntil(dag, DAGState.SUCCEEDED);
     Assert.assertTrue(dag.commitFutures.isEmpty());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 1);
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("v1", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("v1", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
 
     Assert.assertEquals(1, v12OutputCommitter.initCounter);
     Assert.assertEquals(1, v12OutputCommitter.setupCounter);
@@ -1013,6 +1152,71 @@ public class TestCommit {
     Assert.assertEquals(0, v3OutputCommitter.abortCounter);
   }
 
+  // test DAGCommitSucceeded when vertex group has multiple shared outputs
+  @Test(timeout = 5000)
+  public void testDAGCommitSucceeded3_OnVertexSuccess() throws Exception {
+    conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS,
+        false);
+    setupDAG(createDAGPlanWith2VertexGroupOutputs(true, true, true));
+    initDAG(dag);
+    startDAG(dag);
+    VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1");
+    VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2");
+    VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3");
+
+    v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(),
+        TaskState.SUCCEEDED));
+    v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(),
+        TaskState.SUCCEEDED));
+    v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(),
+        TaskState.SUCCEEDED));
+    Assert.assertEquals(VertexState.SUCCEEDED, v1.getState());
+    Assert.assertEquals(VertexState.SUCCEEDED, v2.getState());
+    Assert.assertEquals(VertexState.COMMITTING, v3.getState());
+    Assert.assertEquals(DAGState.RUNNING, dag.getState());
+
+    CountingOutputCommitter v12OutputCommitter1 = (CountingOutputCommitter) v1
+        .getOutputCommitter("v12Out1");
+    v12OutputCommitter1.unblockCommit();
+    CountingOutputCommitter v12OutputCommitter2 = (CountingOutputCommitter) v1
+        .getOutputCommitter("v12Out2");
+    v12OutputCommitter2.unblockCommit();
+    Assert.assertEquals(DAGState.RUNNING, dag.getState());
+
+    CountingOutputCommitter v3OutputCommitter = (CountingOutputCommitter) v3
+        .getOutputCommitter("v3Out");
+    v3OutputCommitter.unblockCommit();
+    waitUntil(dag, DAGState.SUCCEEDED);
+    Assert.assertTrue(dag.commitFutures.isEmpty());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 1);
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("v1", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("v1", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
+
+    Assert.assertEquals(1, v12OutputCommitter1.initCounter);
+    Assert.assertEquals(1, v12OutputCommitter1.setupCounter);
+    Assert.assertEquals(1, v12OutputCommitter1.commitCounter);
+    Assert.assertEquals(0, v12OutputCommitter1.abortCounter);
+
+    Assert.assertEquals(1, v12OutputCommitter2.initCounter);
+    Assert.assertEquals(1, v12OutputCommitter2.setupCounter);
+    Assert.assertEquals(1, v12OutputCommitter2.commitCounter);
+    Assert.assertEquals(0, v12OutputCommitter2.abortCounter);
+
+    Assert.assertEquals(1, v3OutputCommitter.initCounter);
+    Assert.assertEquals(1, v3OutputCommitter.setupCounter);
+    Assert.assertEquals(1, v3OutputCommitter.commitCounter);
+    Assert.assertEquals(0, v3OutputCommitter.abortCounter);
+  }
+
   // commit of vertex group(v1,v2) fail and commit of v3 is not completed
   @Test(timeout = 5000)
   public void testDAGCommitFail1_OnVertexSuccess() throws Exception {
@@ -1048,6 +1252,16 @@ public class TestCommit {
     Assert.assertEquals(DAGTerminationCause.COMMIT_FAILURE,
         dag.getTerminationCause());
     Assert.assertTrue(dag.commitFutures.isEmpty());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
 
     Assert.assertEquals(1, v12OutputCommitter.initCounter);
     Assert.assertEquals(1, v12OutputCommitter.setupCounter);
@@ -1097,6 +1311,16 @@ public class TestCommit {
     Assert.assertEquals(DAGTerminationCause.VERTEX_FAILURE,
         dag.getTerminationCause());
     Assert.assertTrue(dag.commitFutures.isEmpty());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
 
     CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
         .getOutputCommitter("v12Out");
@@ -1152,6 +1376,16 @@ public class TestCommit {
     Assert.assertEquals(DAGTerminationCause.VERTEX_FAILURE,
         dag.getTerminationCause());
     Assert.assertTrue(dag.commitFutures.isEmpty());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
 
     Assert.assertEquals(1, v12OutputCommitter.initCounter);
     Assert.assertEquals(1, v12OutputCommitter.setupCounter);
@@ -1202,6 +1436,16 @@ public class TestCommit {
     Assert.assertEquals(DAGTerminationCause.COMMIT_FAILURE,
         dag.getTerminationCause());
     Assert.assertTrue(dag.commitFutures.isEmpty());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
 
     Assert.assertEquals(1, v12OutputCommitter.initCounter);
     Assert.assertEquals(1, v12OutputCommitter.setupCounter);
@@ -1215,7 +1459,7 @@ public class TestCommit {
   }
 
   @Test (timeout = 5000)
-  public void testDAGInternalErrorWhileCommiting() throws Exception {
+  public void testDAGInternalErrorWhileCommiting_OnDAGSuccess() throws Exception {
     conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS,
         true);
     setupDAG(createDAGPlan(true, true));
@@ -1236,6 +1480,17 @@ public class TestCommit {
     waitUntil(dag, DAGState.ERROR);
 
     Assert.assertEquals(DAGTerminationCause.INTERNAL_ERROR, dag.getTerminationCause());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 1);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
+
     CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
         .getOutputCommitter("v12Out");
     CountingOutputCommitter v3OutputCommitter = (CountingOutputCommitter) v3
@@ -1282,6 +1537,16 @@ public class TestCommit {
     Assert
         .assertEquals(DAGTerminationCause.DAG_KILL, dag.getTerminationCause());
     Assert.assertTrue(dag.commitFutures.isEmpty());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 1);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
 
     CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
         .getOutputCommitter("v12Out");
@@ -1335,6 +1600,16 @@ public class TestCommit {
     Assert
         .assertEquals(DAGTerminationCause.DAG_KILL, dag.getTerminationCause());
     Assert.assertTrue(dag.commitFutures.isEmpty());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
 
     CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
         .getOutputCommitter("v12Out");
@@ -1383,7 +1658,16 @@ public class TestCommit {
     Assert
         .assertEquals(DAGTerminationCause.DAG_KILL, dag.getTerminationCause());
     Assert.assertTrue(dag.commitFutures.isEmpty());
-
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
 
     CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
         .getOutputCommitter("v12Out");
@@ -1424,6 +1708,17 @@ public class TestCommit {
 
     Assert.assertEquals(DAGTerminationCause.VERTEX_RERUN_IN_COMMITTING, dag.getTerminationCause());
     Assert.assertTrue(dag.commitFutures.isEmpty());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 1);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
+
     CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
         .getOutputCommitter("v12Out");
     CountingOutputCommitter v3OutputCommitter = (CountingOutputCommitter) v3
@@ -1461,6 +1756,17 @@ public class TestCommit {
     waitUntil(dag, DAGState.ERROR);
 
     Assert.assertEquals(DAGTerminationCause.INTERNAL_ERROR, dag.getTerminationCause());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 1);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
+
     CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
         .getOutputCommitter("v12Out");
     CountingOutputCommitter v3OutputCommitter = (CountingOutputCommitter) v3
@@ -1476,14 +1782,12 @@ public class TestCommit {
     Assert.assertEquals(1, v3OutputCommitter.abortCounter);
   }
 
-  @Test(timeout = 5000)
-  public void testVertexGroupCommitFinishedEventFail() throws Exception {
+  @Test (timeout = 5000)
+  public void testVertexGroupCommitFinishedEventFail_OnVertexSuccess() throws Exception {
     conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS,
         false);
     setupDAG(createDAGPlan(true, true));
-    MockHistoryEventHandler mockHistoryEventHandler = new MockHistoryEventHandler(appContext);
-    doReturn(mockHistoryEventHandler).when(appContext).getHistoryHandler();
-    mockHistoryEventHandler.failVertexGroupCommitFinishedEvent = true;
+    historyEventHandler.failVertexGroupCommitFinishedEvent = true;
     
     initDAG(dag);
     startDAG(dag);
@@ -1503,6 +1807,16 @@ public class TestCommit {
         .getOutputCommitter("v3Out");
     v12OutputCommitter.unblockCommit();
     waitUntil(dag, DAGState.FAILED);
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
 
     Assert.assertEquals(DAGState.FAILED, dag.getState());
     Assert.assertEquals(DAGTerminationCause.RECOVERY_FAILURE,
@@ -1524,13 +1838,11 @@ public class TestCommit {
   }
 
   @Test(timeout = 5000)
-  public void testDAGCommitStartedEventFail() throws Exception {
+  public void testDAGCommitStartedEventFail_OnDAGSuccess() throws Exception {
     conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS,
         true);
     setupDAG(createDAGPlan(true, true));
-    MockHistoryEventHandler mockHistoryEventHandler = new MockHistoryEventHandler(appContext);
-    doReturn(mockHistoryEventHandler).when(appContext).getHistoryHandler();
-    mockHistoryEventHandler.failDAGCommitStartedEvent = true;
+    historyEventHandler.failDAGCommitStartedEvent = true;
     
     initDAG(dag);
     startDAG(dag);
@@ -1547,6 +1859,17 @@ public class TestCommit {
     waitUntil(dag, DAGState.FAILED);
     Assert.assertEquals(DAGTerminationCause.RECOVERY_FAILURE, dag.getTerminationCause());
     Assert.assertTrue(dag.commitFutures.isEmpty());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
+
     CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
         .getOutputCommitter("v12Out");
     CountingOutputCommitter v3OutputCommitter = (CountingOutputCommitter) v3
@@ -1567,7 +1890,7 @@ public class TestCommit {
   // test commit will be canceled no matter it is started or still in the threadpool
   // ControlledThreadPoolExecutor is used for to not schedule the commits
   @Test(timeout = 5000)
-  public void testCommitCanceled() throws Exception {
+  public void testCommitCanceled_OnDAGSuccess() throws Exception {
     conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS,
         true);
     setupDAG(createDAGPlan(true, true));
@@ -1598,6 +1921,16 @@ public class TestCommit {
     Assert.assertEquals(DAGTerminationCause.DAG_KILL, dag.getTerminationCause());
     // mean the commits have been canceled
     Assert.assertTrue(dag.commitFutures.isEmpty());
+    historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+    historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+    historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+    historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+    historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+    historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 1);
+    historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
 
     CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
         .getOutputCommitter("v12Out");
@@ -1635,7 +1968,7 @@ public class TestCommit {
 
     public boolean failVertexGroupCommitFinishedEvent = false;
     public boolean failDAGCommitStartedEvent = false;
-
+    public List<HistoryEvent> historyEvents = new ArrayList<HistoryEvent>();
     public MockHistoryEventHandler(AppContext context) {
       super(context);
     }
@@ -1650,6 +1983,85 @@ public class TestCommit {
           && failDAGCommitStartedEvent) {
         throw new IOException("fail DAGCommitStartedEvent");
       }
+      historyEvents.add(event.getHistoryEvent());
+    }
+
+    public void verifyVertexGroupCommitStartedEvent(String groupName, int expectedTimes) {
+      int actualTimes = 0;
+      for (HistoryEvent event : historyEvents) {
+        if (event.getEventType() == HistoryEventType.VERTEX_GROUP_COMMIT_STARTED) {
+          VertexGroupCommitStartedEvent startedEvent = (VertexGroupCommitStartedEvent)event;
+          if (startedEvent.getVertexGroupName().equals(groupName)) {
+            actualTimes ++;
+          }
+        }
+      }
+      Assert.assertEquals(expectedTimes, actualTimes);
+    }
+
+    public void verifyVertexGroupCommitFinishedEvent(String groupName, int expectedTimes) {
+      int actualTimes = 0;
+      for (HistoryEvent event : historyEvents) {
+        if (event.getEventType() == HistoryEventType.VERTEX_GROUP_COMMIT_FINISHED) {
+          VertexGroupCommitFinishedEvent finishedEvent = (VertexGroupCommitFinishedEvent)event;
+          if (finishedEvent.getVertexGroupName().equals(groupName)) {
+            actualTimes ++;
+          }
+        }
+      }
+      Assert.assertEquals(expectedTimes, actualTimes);
+    }
+
+    public void verifyVertexCommitStartedEvent(TezVertexID vertexId, int expectedTimes) {
+      int actualTimes = 0;
+      for (HistoryEvent event : historyEvents) {
+        if (event.getEventType() == HistoryEventType.VERTEX_COMMIT_STARTED) {
+          VertexCommitStartedEvent startedEvent = (VertexCommitStartedEvent)event;
+          if (startedEvent.getVertexID().equals(vertexId)) {
+            actualTimes ++;
+          }
+        }
+      }
+      Assert.assertEquals(expectedTimes, actualTimes);
+    }
+
+    public void verifyVertexFinishedEvent(TezVertexID vertexId, int expectedTimes) {
+      int actualTimes = 0;
+      for (HistoryEvent event : historyEvents) {
+        if (event.getEventType() == HistoryEventType.VERTEX_FINISHED) {
+          VertexFinishedEvent finishedEvent = (VertexFinishedEvent)event;
+          if (finishedEvent.getVertexID().equals(vertexId)) {
+            actualTimes ++;
+          }
+        }
+      }
+      Assert.assertEquals(expectedTimes, actualTimes);
+    }
+
+    public void verifyDAGCommitStartedEvent(TezDAGID dagId, int expectedTimes) {
+      int actualTimes = 0;
+      for (HistoryEvent event : historyEvents) {
+        if (event.getEventType() == HistoryEventType.DAG_COMMIT_STARTED) {
+          DAGCommitStartedEvent startedEvent = (DAGCommitStartedEvent)event;
+          if (startedEvent.getDagID().equals(dagId)) {
+            actualTimes ++;
+          }
+        }
+      }
+      Assert.assertEquals(expectedTimes, actualTimes);
+    }
+
+    public void verifyDAGFinishedEvent(TezDAGID dagId, int expectedTimes) {
+      int actualTimes = 0;
+      for (HistoryEvent event : historyEvents) {
+        if (event.getEventType() == HistoryEventType.DAG_FINISHED) {
+          DAGFinishedEvent startedEvent = (DAGFinishedEvent)event;
+          if (startedEvent.getDagID().equals(dagId)) {
+            actualTimes ++;
+          }
+        }
+      }
+      Assert.assertEquals(expectedTimes, actualTimes);
     }
   }
   


[42/43] tez git commit: TEZ-2414. LogicalIOProcessorRuntimeTask, RuntimeTask, TezTaskRunner should handle interrupts & carry out necessary cleanups. Contribtued by Rajesh Balamohan.

Posted by ss...@apache.org.
TEZ-2414. LogicalIOProcessorRuntimeTask, RuntimeTask, TezTaskRunner should handle interrupts & carry out necessary cleanups. Contribtued by Rajesh Balamohan.


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/ea972acd
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/ea972acd
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/ea972acd

Branch: refs/heads/TEZ-2003
Commit: ea972acdc2a48c763596d34acb53577279eb00f0
Parents: fdb9177
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed May 6 00:39:46 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:46:10 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                            |   1 +
 .../runtime/LogicalIOProcessorRuntimeTask.java  | 141 +++++++++++++++++++
 .../org/apache/tez/runtime/RuntimeTask.java     |   5 +
 .../apache/tez/runtime/task/TezTaskRunner.java  |  71 +++++++++-
 4 files changed, 216 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/ea972acd/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 9fc9ed3..f8a71e8 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -20,5 +20,6 @@ ALL CHANGES:
   TEZ-2361. Propagate dag completion to TaskCommunicator.
   TEZ-2381. Fixes after rebase 04/28.
   TEZ-2388. Send dag identifier as part of the fetcher request string.
+  TEZ-2414. LogicalIOProcessorRuntimeTask, RuntimeTask, TezTaskRunner should handle interrupts & carry out necessary cleanups.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/ea972acd/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java
index 1cfe538..48c972c 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java
@@ -20,10 +20,14 @@ package org.apache.tez.runtime;
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.lang.management.ThreadInfo;
+import java.lang.management.ThreadMXBean;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -38,6 +42,7 @@ import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import java.util.concurrent.LinkedBlockingQueue;
 
+import com.google.common.base.Throwables;
 import org.apache.commons.lang.exception.ExceptionUtils;
 import org.apache.tez.runtime.api.TaskContext;
 import org.apache.tez.runtime.api.impl.TezProcessorContextImpl;
@@ -111,6 +116,10 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
   final List<GroupInputSpec> groupInputSpecs;
   ConcurrentHashMap<String, MergedLogicalInput> groupInputsMap;
 
+  final ConcurrentHashMap<String, LogicalInput> initializedInputs;
+  final ConcurrentHashMap<String, LogicalOutput> initializedOutputs;
+
+  private boolean processorClosed;
   final ProcessorDescriptor processorDescriptor;
   AbstractLogicalIOProcessor processor;
   ProcessorContext processorContext;
@@ -163,6 +172,9 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
     this.runInputMap = new LinkedHashMap<String, LogicalInput>();
     this.runOutputMap = new LinkedHashMap<String, LogicalOutput>();
 
+    this.initializedInputs = new ConcurrentHashMap<String, LogicalInput>();
+    this.initializedOutputs = new ConcurrentHashMap<String, LogicalOutput>();
+
     this.processorDescriptor = taskSpec.getProcessorDescriptor();
     this.serviceConsumerMetadata = serviceConsumerMetadata;
     this.envMap = envMap;
@@ -341,11 +353,13 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
       this.state.set(State.CLOSED);
 
       // Close the Processor.
+      processorClosed = true;
       processor.close();
 
       // Close the Inputs.
       for (InputSpec inputSpec : inputSpecs) {
         String srcVertexName = inputSpec.getSourceVertexName();
+        initializedInputs.remove(srcVertexName);
         List<Event> closeInputEvents = ((InputFrameworkInterface)inputsMap.get(srcVertexName)).close();
         sendTaskGeneratedEvents(closeInputEvents,
             EventProducerConsumerType.INPUT, taskSpec.getVertexName(),
@@ -355,6 +369,7 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
       // Close the Outputs.
       for (OutputSpec outputSpec : outputSpecs) {
         String destVertexName = outputSpec.getDestinationVertexName();
+        initializedOutputs.remove(destVertexName);
         List<Event> closeOutputEvents = ((LogicalOutputFrameworkInterface)outputsMap.get(destVertexName)).close();
         sendTaskGeneratedEvents(closeOutputEvents,
             EventProducerConsumerType.OUTPUT, taskSpec.getVertexName(),
@@ -402,6 +417,7 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
           inputContext.getTaskVertexName(), inputContext.getSourceVertexName(),
           taskSpec.getTaskAttemptID());
       LOG.info("Initialized Input with src edge: " + edgeName);
+      initializedInputs.put(edgeName, input);
       return null;
     }
   }
@@ -450,6 +466,7 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
           outputContext.getTaskVertexName(),
           outputContext.getDestinationVertexName(), taskSpec.getTaskAttemptID());
       LOG.info("Initialized Output with dest edge: " + edgeName);
+      initializedOutputs.put(edgeName, output);
       return null;
     }
   }
@@ -675,6 +692,13 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
     eventsToBeProcessed.addAll(events);
   }
 
+  @Override
+  public synchronized void abortTask() throws Exception {
+    if (processor != null) {
+      processor.abort();
+    }
+  }
+
   private void startRouterThread() {
     eventRouterThread = new Thread(new RunnableWithNdc() {
       public void runInternal() {
@@ -694,6 +718,7 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
             if (!isTaskDone()) {
               LOG.warn("Event Router thread interrupted. Returning.");
             }
+            Thread.currentThread().interrupt();
             return;
           }
         }
@@ -705,6 +730,12 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
     eventRouterThread.start();
   }
 
+  private void maybeResetInterruptStatus() {
+    if (!Thread.currentThread().isInterrupted()) {
+      Thread.currentThread().interrupt();
+    }
+  }
+
   private void closeContexts() throws IOException {
     closeContext(inputContextMap);
     closeContext(outputContextMap);
@@ -742,6 +773,102 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
       }
       eventRouterThread = null;
     }
+
+    /**
+     * Cleanup IPO that are not closed.  In case, regular close() has happened in IPO, they
+     * would not be available in the IPOs to be cleaned. So this is safe.
+     *
+     * e.g whenever input gets closed() in normal way, it automatically removes it from
+     * initializedInputs map.
+     *
+     * In case any exception happens in processor close or IO close, it wouldn't be removed from
+     * the initialized IO data structures and here is the chance to close them and release
+     * resources.
+     *
+     */
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Processor closed={}", processorClosed);
+      LOG.debug("Num of inputs to be closed={}", initializedInputs.size());
+      LOG.debug("Num of outputs to be closed={}", initializedOutputs.size());
+    }
+    if (!processorClosed) {
+      try {
+        processorClosed = true;
+        processor.close();
+
+        LOG.info("Closed processor for vertex={}, index={}, interruptedStatus={}",
+            processor
+                .getContext().getTaskVertexName(),
+            processor.getContext().getTaskVertexIndex(),
+            Thread.currentThread().isInterrupted());
+
+        maybeResetInterruptStatus();
+      } catch (InterruptedException ie) {
+        //reset the status
+        LOG.info("Resetting interrupt for processor");
+        Thread.currentThread().interrupt();
+      } catch (Throwable e) {
+        LOG.warn("Exception when closing processor", e);
+      }
+    }
+    // Close the remaining inited Inputs.
+    Iterator<String> srcVertexItr = initializedInputs.keySet().iterator();
+    while (srcVertexItr.hasNext()) {
+      String srcVertexName = srcVertexItr.next();
+      try {
+        srcVertexItr.remove();
+
+        ((InputFrameworkInterface) initializedInputs.get(srcVertexName)).close();
+        initializedInputs.remove(srcVertexName);
+
+        maybeResetInterruptStatus();
+      } catch (InterruptedException ie) {
+        //reset the status
+        LOG.info("Resetting interrupt status for input with srcVertexName={}",
+            srcVertexName);
+        Thread.currentThread().interrupt();
+      } catch (Throwable e) {
+        LOG.warn("Exception when closing input in cleanup(interrupted)", e);
+      } finally {
+        LOG.info("Close input for vertex={}, sourceVertex={}, interruptedStatus={}", processor
+            .getContext().getTaskVertexName(), srcVertexName, Thread.currentThread()
+            .isInterrupted());
+      }
+    }
+
+    // Close the remaining inited Outputs.
+    try {
+      Iterator<String> outVertexItr = initializedOutputs.keySet().iterator();
+      while (outVertexItr.hasNext()) {
+        String destVertexName = outVertexItr.next();
+        try {
+          outVertexItr.remove();
+
+          ((OutputFrameworkInterface) initializedOutputs.get(destVertexName)).close();
+          initializedOutputs.remove(destVertexName);
+
+          maybeResetInterruptStatus();
+        } catch (InterruptedException ie) {
+          //reset the status
+          LOG.info("Resetting interrupt status for output with destVertexName={}",
+              destVertexName);
+          Thread.currentThread().interrupt();
+        } catch (Throwable e) {
+          LOG.warn("Exception when closing output in cleanup(interrupted)", e);
+        } finally {
+          LOG.info("Close input for vertex={}, sourceVertex={}, interruptedStatus={}", processor
+              .getContext().getTaskVertexName(), destVertexName, Thread.currentThread()
+              .isInterrupted());
+        }
+      }
+    } catch (Throwable e) {
+      LOG.warn(Throwables.getStackTraceAsString(e));
+    }
+
+    if (LOG.isDebugEnabled()) {
+      printThreads();
+    }
+
     try {
       closeContexts();
       // Cleanup references which may be held by misbehaved tasks.
@@ -785,6 +912,20 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
     inputReadyTracker = null;
     objectRegistry = null;
   }
+
+
+  /**
+   * Print all threads in JVM (only for debugging)
+   */
+  void printThreads() {
+    //Print the status of all threads in JVM
+    ThreadMXBean threadMXBean = ManagementFactory.getThreadMXBean();
+    long[] threadIds = threadMXBean.getAllThreadIds();
+    for (Long id : threadIds) {
+      ThreadInfo threadInfo = threadMXBean.getThreadInfo(id);
+      LOG.info("ThreadId : " + id + ", name=" + threadInfo.getThreadName());
+    }
+  }
   
   @Private
   @VisibleForTesting

http://git-wip-us.apache.org/repos/asf/tez/blob/ea972acd/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java
index 921095c..7b09455 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java
@@ -74,6 +74,10 @@ public abstract class RuntimeTask {
 
   protected final AtomicReference<State> state = new AtomicReference<State>();
 
+  public boolean isRunning() {
+    return (state.get() == State.RUNNING);
+  }
+
   public TezCounters addAndGetTezCounter(String name) {
     TezCounters counter = new TezCounters();
     counterMap.put(name, counter);
@@ -153,4 +157,5 @@ public abstract class RuntimeTask {
     taskDone.set(true);
   }
 
+  public abstract void abortTask() throws Exception;
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/ea972acd/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
index 33a7f4a..7238d5e 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
@@ -25,8 +25,13 @@ import java.security.PrivilegedExceptionAction;
 import java.util.Collection;
 import java.util.Map;
 import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.Condition;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
 
+import com.google.common.base.Throwables;
 import org.apache.commons.lang.exception.ExceptionUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSError;
@@ -35,6 +40,7 @@ import org.apache.tez.common.CallableWithNdc;
 import org.apache.tez.dag.api.TezException;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.runtime.LogicalIOProcessorRuntimeTask;
+import org.apache.tez.runtime.RuntimeTask;
 import org.apache.tez.runtime.api.ExecutionContext;
 import org.apache.tez.runtime.api.ObjectRegistry;
 import org.apache.tez.runtime.api.impl.EventMetaData;
@@ -61,6 +67,7 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
   private final ListeningExecutorService executor;
   private volatile ListenableFuture<Void> taskFuture;
   private volatile Thread waitingThread;
+  private volatile Thread taskRunner;
   private volatile Throwable firstException;
 
   // Effectively a duplicate check, since hadFatalError does the same thing.
@@ -96,7 +103,10 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
     taskReporter.registerTask(task, this);
     TaskRunnerCallable callable = new TaskRunnerCallable();
     Throwable failureCause = null;
-    taskFuture = executor.submit(callable);
+    if (!Thread.currentThread().isInterrupted()) {
+      taskFuture = executor.submit(callable);
+      return isShutdownRequested();
+    }
     try {
       taskFuture.get();
 
@@ -158,6 +168,10 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
         }
       }
     }
+    return isShutdownRequested();
+  }
+
+  private boolean isShutdownRequested() {
     if (shutdownRequested.get()) {
       LOG.info("Shutdown requested... returning");
       return false;
@@ -173,11 +187,14 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
           @Override
           public Void run() throws Exception {
             try {
+              taskRunner = Thread.currentThread();
               LOG.info("Initializing task" + ", taskAttemptId=" + task.getTaskAttemptID());
               task.initialize();
               if (!Thread.currentThread().isInterrupted() && firstException == null) {
                 LOG.info("Running task, taskAttemptId=" + task.getTaskAttemptID());
                 task.run();
+                maybeInterruptWaitingThread();
+
                 LOG.info("Closing task, taskAttemptId=" + task.getTaskAttemptID());
                 task.close();
                 task.setFrameworkCounters();
@@ -199,6 +216,12 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
               }
               return null;
             } catch (Throwable cause) {
+              if (Thread.currentThread().isInterrupted()) {
+                LOG.info("TaskRunnerCallable interrupted=" + Thread.currentThread().isInterrupted()
+                    + ", shutdownRequest=" + shutdownRequested.get());
+                Thread.currentThread().interrupt();
+                return null;
+              }
               if (cause instanceof FSError) {
                 // Not immediately fatal, this is an error reported by Hadoop FileSystem
                 maybeRegisterFirstException(cause);
@@ -255,6 +278,17 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
         taskRunning.set(false);
       }
     }
+
+    private void maybeInterruptWaitingThread() {
+      /**
+       * Possible that the processor is swallowing InterruptException of taskRunner.interrupt().
+       * In such case, interrupt the waitingThread based on the shutdownRequested flag, so that
+       * entire task gets cancelled.
+       */
+      if (shutdownRequested.get()) {
+        waitingThread.interrupt();
+      }
+    }
   }
 
   // should wait until all messages are sent to AM before TezChild shutdown
@@ -353,10 +387,43 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
     }
   }
 
+  private void abortRunningTask() {
+    if (!taskRunning.get()) {
+      LOG.info("Task is not running");
+      waitingThread.interrupt();
+      return;
+    }
+
+    if (taskRunning.get()) {
+      try {
+        task.abortTask();
+      } catch (Exception e) {
+        LOG.warn("Error when aborting the task", e);
+        try {
+          sendFailure(e, "Error when aborting the task");
+        } catch (Exception ignored) {
+          // Ignored.
+        }
+      }
+    }
+    //Interrupt the relevant threads.  TaskRunner should be interrupted preferably.
+    if (isTaskRunning()) {
+      LOG.info("Interrupting taskRunner=" + taskRunner.getName());
+      taskRunner.interrupt();
+    } else {
+      LOG.info("Interrupting waitingThread=" + waitingThread.getName());
+      waitingThread.interrupt();
+    }
+  }
+
+  private boolean isTaskRunning() {
+    return (taskRunning.get() && task.isRunning());
+  }
+
   @Override
   public void shutdownRequested() {
     shutdownRequested.set(true);
-    waitingThread.interrupt();
+    abortRunningTask();
   }
 
   private String getTaskDiagnosticsString(Throwable t, String message) {


[23/43] tez git commit: TEZ-2090. Add tests for jobs running in external services. (sseth)

Posted by ss...@apache.org.
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
new file mode 100644
index 0000000..4a6ce33
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
@@ -0,0 +1,512 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.service.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.nio.ByteBuffer;
+import java.security.PrivilegedExceptionAction;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Stopwatch;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Multimap;
+import com.google.common.util.concurrent.FutureCallback;
+import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.common.util.concurrent.ListeningExecutorService;
+import com.google.common.util.concurrent.MoreExecutors;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.security.SecurityUtil;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.util.AuxiliaryServiceHelper;
+import org.apache.log4j.Logger;
+import org.apache.tez.common.TezCommonUtils;
+import org.apache.tez.common.TezTaskUmbilicalProtocol;
+import org.apache.tez.common.security.JobTokenIdentifier;
+import org.apache.tez.common.security.TokenCache;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezException;
+import org.apache.tez.runtime.task.TaskReporter;
+import org.apache.tez.runtime.task.TezTaskRunner;
+import org.apache.tez.service.ContainerRunner;
+import org.apache.tez.dag.api.TezConstants;
+import org.apache.tez.runtime.api.ExecutionContext;
+import org.apache.tez.runtime.api.impl.ExecutionContextImpl;
+import org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl;
+import org.apache.tez.runtime.task.TezChild;
+import org.apache.tez.runtime.task.TezChild.ContainerExecutionResult;
+import org.apache.tez.shufflehandler.ShuffleHandler;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
+import org.apache.tez.util.ProtoConverters;
+
+public class ContainerRunnerImpl extends AbstractService implements ContainerRunner {
+
+  private static final Logger LOG = Logger.getLogger(ContainerRunnerImpl.class);
+
+  private final ListeningExecutorService executorService;
+  private final AtomicReference<InetSocketAddress> localAddress;
+  private final String[] localDirsBase;
+  private final Map<String, String> localEnv = new HashMap<String, String>();
+  private volatile FileSystem localFs;
+  private final long memoryPerExecutor;
+  // TODO Support for removing queued containers, interrupting / killing specific containers - when preemption is supported
+
+
+
+
+  public ContainerRunnerImpl(int numExecutors, String[] localDirsBase,
+                             AtomicReference<InetSocketAddress> localAddress,
+                             long totalMemoryAvailableBytes) {
+    super("ContainerRunnerImpl");
+    Preconditions.checkState(numExecutors > 0,
+        "Invalid number of executors: " + numExecutors + ". Must be > 0");
+    this.localDirsBase = localDirsBase;
+    this.localAddress = localAddress;
+
+    ExecutorService raw = Executors.newFixedThreadPool(numExecutors,
+        new ThreadFactoryBuilder().setNameFormat("ContainerExecutor %d").build());
+    this.executorService = MoreExecutors.listeningDecorator(raw);
+
+
+    // 80% of memory considered for accounted buffers. Rest for objects.
+    // TODO Tune this based on the available size.
+    this.memoryPerExecutor = (long)(totalMemoryAvailableBytes * 0.8 / (float) numExecutors);
+
+    LOG.info("ContainerRunnerImpl config: " +
+        "memoryPerExecutorDerived=" + memoryPerExecutor +
+        ", numExecutors=" + numExecutors
+    );
+  }
+
+  @Override
+  public void serviceInit(Configuration conf) {
+    try {
+      localFs = FileSystem.getLocal(conf);
+    } catch (IOException e) {
+      throw new RuntimeException("Failed to setup local filesystem instance", e);
+    }
+  }
+
+  @Override
+  public void serviceStart() {
+  }
+
+  public void setShufflePort(int shufflePort) {
+    AuxiliaryServiceHelper.setServiceDataIntoEnv(
+        TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID,
+        ByteBuffer.allocate(4).putInt(shufflePort), localEnv);
+  }
+
+  @Override
+  protected void serviceStop() throws Exception {
+    super.serviceStop();
+  }
+
+  // TODO Move this into a utilities class
+  private static String createAppSpecificLocalDir(String baseDir, String applicationIdString,
+                                                  String user) {
+    return baseDir + File.separator + "usercache" + File.separator + user + File.separator +
+        "appcache" + File.separator + applicationIdString;
+  }
+
+  /**
+   * Submit a container which is ready for running.
+   * The regular pull mechanism will be used to fetch work from the AM
+   * @param request
+   * @throws IOException
+   */
+  @Override
+  public void queueContainer(RunContainerRequestProto request) throws IOException {
+    LOG.info("Queuing container for execution: " + request);
+
+    Map<String, String> env = new HashMap<String, String>();
+    env.putAll(localEnv);
+    env.put(ApplicationConstants.Environment.USER.name(), request.getUser());
+
+    String[] localDirs = new String[localDirsBase.length];
+
+    // Setup up local dirs to be application specific, and create them.
+    for (int i = 0; i < localDirsBase.length; i++) {
+      localDirs[i] = createAppSpecificLocalDir(localDirsBase[i], request.getApplicationIdString(),
+          request.getUser());
+      localFs.mkdirs(new Path(localDirs[i]));
+    }
+    LOG.info("DEBUG: Dirs are: " + Arrays.toString(localDirs));
+
+
+    // Setup workingDir. This is otherwise setup as Environment.PWD
+    // Used for re-localization, to add the user specified configuration (conf_pb_binary_stream)
+    String workingDir = localDirs[0];
+
+    Credentials credentials = new Credentials();
+    DataInputBuffer dib = new DataInputBuffer();
+    byte[] tokenBytes = request.getCredentialsBinary().toByteArray();
+    dib.reset(tokenBytes, tokenBytes.length);
+    credentials.readTokenStorageStream(dib);
+
+    Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
+
+    // TODO Unregistering does not happen at the moment, since there's no signals on when an app completes.
+    LOG.info("DEBUG: Registering request with the ShuffleHandler");
+    ShuffleHandler.get().registerApplication(request.getApplicationIdString(), jobToken, request.getUser());
+
+
+    ContainerRunnerCallable callable = new ContainerRunnerCallable(request, new Configuration(getConfig()),
+        new ExecutionContextImpl(localAddress.get().getHostName()), env, localDirs,
+        workingDir, credentials, memoryPerExecutor);
+    ListenableFuture<ContainerExecutionResult> future = executorService
+        .submit(callable);
+    Futures.addCallback(future, new ContainerRunnerCallback(request, callable));
+  }
+
+  /**
+   * Submit an entire work unit - containerId + TaskSpec.
+   * This is intended for a task push from the AM
+   *
+   * @param request
+   * @throws IOException
+   */
+  @Override
+  public void submitWork(SubmitWorkRequestProto request) throws
+      IOException {
+    LOG.info("Queuing work for execution: " + request);
+
+    Map<String, String> env = new HashMap<String, String>();
+    env.putAll(localEnv);
+    env.put(ApplicationConstants.Environment.USER.name(), request.getUser());
+
+    String[] localDirs = new String[localDirsBase.length];
+
+    // Setup up local dirs to be application specific, and create them.
+    for (int i = 0; i < localDirsBase.length; i++) {
+      localDirs[i] = createAppSpecificLocalDir(localDirsBase[i], request.getApplicationIdString(),
+          request.getUser());
+      localFs.mkdirs(new Path(localDirs[i]));
+    }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Dirs are: " + Arrays.toString(localDirs));
+    }
+
+    // Setup workingDir. This is otherwise setup as Environment.PWD
+    // Used for re-localization, to add the user specified configuration (conf_pb_binary_stream)
+    String workingDir = localDirs[0];
+
+    Credentials credentials = new Credentials();
+    DataInputBuffer dib = new DataInputBuffer();
+    byte[] tokenBytes = request.getCredentialsBinary().toByteArray();
+    dib.reset(tokenBytes, tokenBytes.length);
+    credentials.readTokenStorageStream(dib);
+
+    Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
+
+    // TODO Unregistering does not happen at the moment, since there's no signals on when an app completes.
+    LOG.info("DEBUG: Registering request with the ShuffleHandler");
+    ShuffleHandler.get().registerApplication(request.getApplicationIdString(), jobToken, request.getUser());
+    TaskRunnerCallable callable = new TaskRunnerCallable(request, new Configuration(getConfig()),
+        new ExecutionContextImpl(localAddress.get().getHostName()), env, localDirs,
+        workingDir, credentials, memoryPerExecutor);
+    ListenableFuture<ContainerExecutionResult> future = executorService.submit(callable);
+    Futures.addCallback(future, new TaskRunnerCallback(request, callable));
+  }
+
+
+  static class ContainerRunnerCallable implements Callable<ContainerExecutionResult> {
+
+    private final RunContainerRequestProto request;
+    private final Configuration conf;
+    private final String workingDir;
+    private final String[] localDirs;
+    private final Map<String, String> envMap;
+    private final String pid = null;
+    private final ObjectRegistryImpl objectRegistry;
+    private final ExecutionContext executionContext;
+    private final Credentials credentials;
+    private final long memoryAvailable;
+    private volatile TezChild tezChild;
+
+
+    ContainerRunnerCallable(RunContainerRequestProto request, Configuration conf,
+                            ExecutionContext executionContext, Map<String, String> envMap,
+                            String[] localDirs, String workingDir, Credentials credentials,
+                            long memoryAvailable) {
+      this.request = request;
+      this.conf = conf;
+      this.executionContext = executionContext;
+      this.envMap = envMap;
+      this.workingDir = workingDir;
+      this.localDirs = localDirs;
+      this.objectRegistry = new ObjectRegistryImpl();
+      this.credentials = credentials;
+      this.memoryAvailable = memoryAvailable;
+
+    }
+
+    @Override
+    public ContainerExecutionResult call() throws Exception {
+      Stopwatch sw = new Stopwatch().start();
+      tezChild =
+          new TezChild(conf, request.getAmHost(), request.getAmPort(),
+              request.getContainerIdString(),
+              request.getTokenIdentifier(), request.getAppAttemptNumber(), workingDir, localDirs,
+              envMap, objectRegistry, pid,
+              executionContext, credentials, memoryAvailable, request.getUser());
+      ContainerExecutionResult result = tezChild.run();
+      LOG.info("ExecutionTime for Container: " + request.getContainerIdString() + "=" +
+          sw.stop().elapsedMillis());
+      return result;
+    }
+
+    public TezChild getTezChild() {
+      return this.tezChild;
+    }
+  }
+
+
+  final class ContainerRunnerCallback implements FutureCallback<ContainerExecutionResult> {
+
+    private final RunContainerRequestProto request;
+    private final ContainerRunnerCallable containerRunnerCallable;
+
+    ContainerRunnerCallback(RunContainerRequestProto request,
+                            ContainerRunnerCallable containerRunnerCallable) {
+      this.request = request;
+      this.containerRunnerCallable = containerRunnerCallable;
+    }
+
+    // TODO Proper error handling
+    @Override
+    public void onSuccess(ContainerExecutionResult result) {
+      switch (result.getExitStatus()) {
+        case SUCCESS:
+          LOG.info("Successfully finished: " + request.getApplicationIdString() + ", containerId=" +
+              request.getContainerIdString());
+          break;
+        case EXECUTION_FAILURE:
+          LOG.info("Failed to run: " + request.getApplicationIdString() + ", containerId=" +
+              request.getContainerIdString(), result.getThrowable());
+          break;
+        case INTERRUPTED:
+          LOG.info(
+              "Interrupted while running: " + request.getApplicationIdString() + ", containerId=" +
+                  request.getContainerIdString(), result.getThrowable());
+          break;
+        case ASKED_TO_DIE:
+          LOG.info(
+              "Asked to die while running: " + request.getApplicationIdString() + ", containerId=" +
+                  request.getContainerIdString());
+          break;
+      }
+    }
+
+    @Override
+    public void onFailure(Throwable t) {
+      LOG.error(
+          "TezChild execution failed for : " + request.getApplicationIdString() + ", containerId=" +
+              request.getContainerIdString(), t);
+      TezChild tezChild = containerRunnerCallable.getTezChild();
+      if (tezChild != null) {
+        tezChild.shutdown();
+      }
+    }
+  }
+
+  static class TaskRunnerCallable implements Callable<ContainerExecutionResult> {
+
+    private final SubmitWorkRequestProto request;
+    private final Configuration conf;
+    private final String workingDir;
+    private final String[] localDirs;
+    private final Map<String, String> envMap;
+    private final String pid = null;
+    private final ObjectRegistryImpl objectRegistry;
+    private final ExecutionContext executionContext;
+    private final Credentials credentials;
+    private final long memoryAvailable;
+    private final ListeningExecutorService executor;
+    private volatile TezTaskRunner taskRunner;
+    private volatile TaskReporter taskReporter;
+    private TezTaskUmbilicalProtocol umbilical;
+
+
+    TaskRunnerCallable(SubmitWorkRequestProto request, Configuration conf,
+                             ExecutionContext executionContext, Map<String, String> envMap,
+                             String[] localDirs, String workingDir, Credentials credentials,
+                             long memoryAvailable) {
+      this.request = request;
+      this.conf = conf;
+      this.executionContext = executionContext;
+      this.envMap = envMap;
+      this.workingDir = workingDir;
+      this.localDirs = localDirs;
+      this.objectRegistry = new ObjectRegistryImpl();
+      this.credentials = credentials;
+      this.memoryAvailable = memoryAvailable;
+      // TODO This executor seems unnecessary. Here and TezChild
+      ExecutorService executorReal = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder()
+          .setDaemon(true)
+          .setNameFormat("TezTaskRunner_" + request.getTaskSpec().getTaskAttemptIdString()).build());
+      executor = MoreExecutors.listeningDecorator(executorReal);
+    }
+
+    @Override
+    public ContainerExecutionResult call() throws Exception {
+
+      // TODO Consolidate this code with TezChild.
+      Stopwatch sw = new Stopwatch().start();
+      UserGroupInformation taskUgi = UserGroupInformation.createRemoteUser(request.getUser());
+      taskUgi.addCredentials(credentials);
+
+      Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
+      Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<String, ByteBuffer>();
+      serviceConsumerMetadata.put(TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID,
+          TezCommonUtils.convertJobTokenToBytes(jobToken));
+      Multimap<String, String> startedInputsMap = HashMultimap.create();
+
+      UserGroupInformation taskOwner =
+          UserGroupInformation.createRemoteUser(request.getTokenIdentifier());
+      final InetSocketAddress address =
+          NetUtils.createSocketAddrForHost(request.getAmHost(), request.getAmPort());
+      SecurityUtil.setTokenService(jobToken, address);
+      taskOwner.addToken(jobToken);
+      umbilical = taskOwner.doAs(new PrivilegedExceptionAction<TezTaskUmbilicalProtocol>() {
+        @Override
+        public TezTaskUmbilicalProtocol run() throws Exception {
+          return RPC.getProxy(TezTaskUmbilicalProtocol.class,
+              TezTaskUmbilicalProtocol.versionID, address, conf);
+        }
+      });
+      // TODO Stop reading this on each request.
+      taskReporter = new TaskReporter(
+          umbilical,
+          conf.getInt(TezConfiguration.TEZ_TASK_AM_HEARTBEAT_INTERVAL_MS,
+              TezConfiguration.TEZ_TASK_AM_HEARTBEAT_INTERVAL_MS_DEFAULT),
+          conf.getLong(
+              TezConfiguration.TEZ_TASK_AM_HEARTBEAT_COUNTER_INTERVAL_MS,
+              TezConfiguration.TEZ_TASK_AM_HEARTBEAT_COUNTER_INTERVAL_MS_DEFAULT),
+          conf.getInt(TezConfiguration.TEZ_TASK_MAX_EVENTS_PER_HEARTBEAT,
+              TezConfiguration.TEZ_TASK_MAX_EVENTS_PER_HEARTBEAT_DEFAULT),
+          new AtomicLong(0),
+          request.getContainerIdString());
+
+      taskRunner = new TezTaskRunner(conf, taskUgi, localDirs,
+          ProtoConverters.getTaskSpecfromProto(request.getTaskSpec()), umbilical,
+          request.getAppAttemptNumber(),
+          serviceConsumerMetadata, envMap, startedInputsMap, taskReporter, executor, objectRegistry,
+          pid,
+          executionContext, memoryAvailable);
+
+      boolean shouldDie;
+      try {
+        shouldDie = !taskRunner.run();
+        if (shouldDie) {
+          LOG.info("Got a shouldDie notification via hearbeats. Shutting down");
+          return new ContainerExecutionResult(ContainerExecutionResult.ExitStatus.SUCCESS, null,
+              "Asked to die by the AM");
+        }
+      } catch (IOException e) {
+        return new ContainerExecutionResult(ContainerExecutionResult.ExitStatus.EXECUTION_FAILURE,
+            e, "TaskExecutionFailure: " + e.getMessage());
+      } catch (TezException e) {
+        return new ContainerExecutionResult(ContainerExecutionResult.ExitStatus.EXECUTION_FAILURE,
+            e, "TaskExecutionFailure: " + e.getMessage());
+      } finally {
+        FileSystem.closeAllForUGI(taskUgi);
+      }
+      LOG.info("ExecutionTime for Container: " + request.getContainerIdString() + "=" +
+          sw.stop().elapsedMillis());
+      return new ContainerExecutionResult(ContainerExecutionResult.ExitStatus.SUCCESS, null,
+          null);
+    }
+
+    public void shutdown() {
+      executor.shutdownNow();
+      if (taskReporter != null) {
+        taskReporter.shutdown();
+      }
+      if (umbilical != null) {
+        RPC.stopProxy(umbilical);
+      }
+    }
+  }
+
+
+  final class TaskRunnerCallback implements FutureCallback<ContainerExecutionResult> {
+
+    private final SubmitWorkRequestProto request;
+    private final TaskRunnerCallable taskRunnerCallable;
+
+    TaskRunnerCallback(SubmitWorkRequestProto request,
+                            TaskRunnerCallable containerRunnerCallable) {
+      this.request = request;
+      this.taskRunnerCallable = containerRunnerCallable;
+    }
+
+    // TODO Proper error handling
+    @Override
+    public void onSuccess(ContainerExecutionResult result) {
+      switch (result.getExitStatus()) {
+        case SUCCESS:
+          LOG.info("Successfully finished: " + request.getApplicationIdString() + ", containerId=" +
+              request.getContainerIdString());
+          break;
+        case EXECUTION_FAILURE:
+          LOG.info("Failed to run: " + request.getApplicationIdString() + ", containerId=" +
+              request.getContainerIdString(), result.getThrowable());
+          break;
+        case INTERRUPTED:
+          LOG.info(
+              "Interrupted while running: " + request.getApplicationIdString() + ", containerId=" +
+                  request.getContainerIdString(), result.getThrowable());
+          break;
+        case ASKED_TO_DIE:
+          LOG.info(
+              "Asked to die while running: " + request.getApplicationIdString() + ", containerId=" +
+                  request.getContainerIdString());
+          break;
+      }
+      taskRunnerCallable.shutdown();
+    }
+
+    @Override
+    public void onFailure(Throwable t) {
+      LOG.error(
+          "TezTaskRunner execution failed for : " + request.getApplicationIdString() + ", containerId=" +
+              request.getContainerIdString(), t);
+      taskRunnerCallable.shutdown();
+    }
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
new file mode 100644
index 0000000..012e352
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.service.impl;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.Arrays;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.log4j.Logger;
+import org.apache.tez.service.ContainerRunner;
+import org.apache.tez.shufflehandler.ShuffleHandler;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+
+public class TezTestService extends AbstractService implements ContainerRunner {
+
+  private static final Logger LOG = Logger.getLogger(TezTestService.class);
+
+  private final Configuration shuffleHandlerConf;
+  private final int numExecutors;
+
+  private final TezTestServiceProtocolServerImpl server;
+  private final ContainerRunnerImpl containerRunner;
+  private final String[] localDirs;
+
+  private final AtomicInteger numSubmissions = new AtomicInteger(0);
+
+
+  private final AtomicReference<InetSocketAddress> address = new AtomicReference<InetSocketAddress>();
+
+  public TezTestService(Configuration conf, int numExecutors, long memoryAvailable, String[] localDirs) {
+    super(TezTestService.class.getSimpleName());
+    this.numExecutors = numExecutors;
+    this.localDirs = localDirs;
+
+    long memoryAvailableBytes = memoryAvailable;
+    long jvmMax = Runtime.getRuntime().maxMemory();
+
+    LOG.info(TezTestService.class.getSimpleName() + " created with the following configuration: " +
+        "numExecutors=" + numExecutors +
+        ", workDirs=" + Arrays.toString(localDirs) +
+        ", memoryAvailable=" + memoryAvailable +
+        ", jvmMaxMemory=" + jvmMax);
+
+    Preconditions.checkArgument(this.numExecutors > 0);
+    Preconditions.checkArgument(this.localDirs != null && this.localDirs.length > 0,
+        "Work dirs must be specified");
+    Preconditions.checkState(jvmMax >= memoryAvailableBytes,
+        "Invalid configuration. Xmx value too small. maxAvailable=" + jvmMax + ", configured=" +
+            memoryAvailableBytes);
+
+    this.shuffleHandlerConf = new Configuration(conf);
+    // Start Shuffle on a random port
+    this.shuffleHandlerConf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);
+    this.shuffleHandlerConf.set(ShuffleHandler.SHUFFLE_HANDLER_LOCAL_DIRS, StringUtils.arrayToString(localDirs));
+
+    this.server = new TezTestServiceProtocolServerImpl(this, address);
+    this.containerRunner = new ContainerRunnerImpl(numExecutors, localDirs, address,
+        memoryAvailableBytes);
+  }
+
+  @Override
+  public void serviceInit(Configuration conf) {
+    server.init(conf);
+    containerRunner.init(conf);
+  }
+
+  @Override
+  public void serviceStart() throws Exception {
+    ShuffleHandler.initializeAndStart(shuffleHandlerConf);
+    containerRunner.setShufflePort(ShuffleHandler.get().getPort());
+    server.start();
+    containerRunner.start();
+  }
+
+  public void serviceStop() throws Exception {
+    containerRunner.stop();
+    server.stop();
+    ShuffleHandler.get().stop();
+  }
+
+  public InetSocketAddress getListenerAddress() {
+    return server.getBindAddress();
+  }
+
+  public int getShufflePort() {
+    return ShuffleHandler.get().getPort();
+  }
+
+
+
+  @Override
+  public void queueContainer(RunContainerRequestProto request) throws IOException {
+    numSubmissions.incrementAndGet();
+    containerRunner.queueContainer(request);
+  }
+
+  @Override
+  public void submitWork(TezTestServiceProtocolProtos.SubmitWorkRequestProto request) throws
+      IOException {
+    numSubmissions.incrementAndGet();
+    containerRunner.submitWork(request);
+  }
+
+  public int getNumSubmissions() {
+    return numSubmissions.get();
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolClientImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolClientImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolClientImpl.java
new file mode 100644
index 0000000..10d2952
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolClientImpl.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.service.impl;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+
+import com.google.protobuf.RpcController;
+import com.google.protobuf.ServiceException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ipc.ProtobufRpcEngine;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.tez.service.TezTestServiceProtocolBlockingPB;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerResponseProto;
+
+
+public class TezTestServiceProtocolClientImpl implements TezTestServiceProtocolBlockingPB {
+
+  private final Configuration conf;
+  private final InetSocketAddress serverAddr;
+  TezTestServiceProtocolBlockingPB proxy;
+
+
+  public TezTestServiceProtocolClientImpl(Configuration conf, String hostname, int port) {
+    this.conf = conf;
+    this.serverAddr = NetUtils.createSocketAddr(hostname, port);
+  }
+
+  @Override
+  public RunContainerResponseProto runContainer(RpcController controller,
+                                                RunContainerRequestProto request) throws
+      ServiceException {
+    try {
+      return getProxy().runContainer(null, request);
+    } catch (IOException e) {
+      throw new ServiceException(e);
+    }
+  }
+
+  @Override
+  public TezTestServiceProtocolProtos.SubmitWorkResponseProto submitWork(RpcController controller,
+                                                                         TezTestServiceProtocolProtos.SubmitWorkRequestProto request) throws
+      ServiceException {
+    try {
+      return getProxy().submitWork(null, request);
+    } catch (IOException e) {
+      throw new ServiceException(e);
+    }
+  }
+
+
+  public TezTestServiceProtocolBlockingPB getProxy() throws IOException {
+    if (proxy == null) {
+      proxy = createProxy();
+    }
+    return proxy;
+  }
+
+  public TezTestServiceProtocolBlockingPB createProxy() throws IOException {
+    TezTestServiceProtocolBlockingPB p;
+    // TODO Fix security
+    RPC.setProtocolEngine(conf, TezTestServiceProtocolBlockingPB.class, ProtobufRpcEngine.class);
+    p = (TezTestServiceProtocolBlockingPB) RPC
+        .getProxy(TezTestServiceProtocolBlockingPB.class, 0, serverAddr, conf);
+    return p;
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
new file mode 100644
index 0000000..d7f8444
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.service.impl;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.concurrent.atomic.AtomicReference;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.protobuf.BlockingService;
+import com.google.protobuf.RpcController;
+import com.google.protobuf.ServiceException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ipc.ProtobufRpcEngine;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.tez.service.ContainerRunner;
+import org.apache.tez.service.TezTestServiceProtocolBlockingPB;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerResponseProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkResponseProto;
+
+public class TezTestServiceProtocolServerImpl extends AbstractService
+    implements TezTestServiceProtocolBlockingPB {
+
+  private static final Log LOG = LogFactory.getLog(TezTestServiceProtocolServerImpl.class);
+
+  private final ContainerRunner containerRunner;
+  private RPC.Server server;
+  private final AtomicReference<InetSocketAddress> bindAddress;
+
+
+  public TezTestServiceProtocolServerImpl(ContainerRunner containerRunner,
+                                          AtomicReference<InetSocketAddress> address) {
+    super(TezTestServiceProtocolServerImpl.class.getSimpleName());
+    this.containerRunner = containerRunner;
+    this.bindAddress = address;
+  }
+
+  @Override
+  public RunContainerResponseProto runContainer(RpcController controller,
+                                                RunContainerRequestProto request) throws
+      ServiceException {
+    LOG.info("Received request: " + request);
+    try {
+      containerRunner.queueContainer(request);
+    } catch (IOException e) {
+      throw new ServiceException(e);
+    }
+    return RunContainerResponseProto.getDefaultInstance();
+  }
+
+  @Override
+  public SubmitWorkResponseProto submitWork(RpcController controller, TezTestServiceProtocolProtos.SubmitWorkRequestProto request) throws
+      ServiceException {
+    LOG.info("Received submitWork request: " + request);
+    try {
+      containerRunner.submitWork(request);
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+    return SubmitWorkResponseProto.getDefaultInstance();
+  }
+
+
+  @Override
+  public void serviceStart() {
+    Configuration conf = getConfig();
+
+    int numHandlers = 3;
+    InetSocketAddress addr = new InetSocketAddress(0);
+
+    try {
+      server = createServer(TezTestServiceProtocolBlockingPB.class, addr, conf, numHandlers,
+          TezTestServiceProtocolProtos.TezTestServiceProtocol.newReflectiveBlockingService(this));
+      server.start();
+    } catch (IOException e) {
+      LOG.error("Failed to run RPC Server", e);
+      throw new RuntimeException(e);
+    }
+
+    InetSocketAddress serverBindAddress = NetUtils.getConnectAddress(server);
+    this.bindAddress.set(NetUtils.createSocketAddrForHost(
+        serverBindAddress.getAddress().getCanonicalHostName(),
+        serverBindAddress.getPort()));
+    LOG.info("Instantiated TestTestServiceListener at " + bindAddress);
+  }
+
+  @Override
+  public void serviceStop() {
+    if (server != null) {
+      server.stop();
+    }
+  }
+
+  @InterfaceAudience.Private
+  @VisibleForTesting
+  InetSocketAddress getBindAddress() {
+    return this.bindAddress.get();
+  }
+
+  private RPC.Server createServer(Class<?> pbProtocol, InetSocketAddress addr, Configuration conf,
+                                  int numHandlers, BlockingService blockingService) throws
+      IOException {
+    RPC.setProtocolEngine(conf, pbProtocol, ProtobufRpcEngine.class);
+    RPC.Server server = new RPC.Builder(conf)
+        .setProtocol(pbProtocol)
+        .setInstance(blockingService)
+        .setBindAddress(addr.getHostName())
+        .setPort(0)
+        .setNumHandlers(numHandlers)
+        .build();
+    // TODO Add security.
+    return server;
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java
new file mode 100644
index 0000000..65588fe
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.shufflehandler;
+
+import java.io.FileDescriptor;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.ReadaheadPool;
+import org.apache.hadoop.io.ReadaheadPool.ReadaheadRequest;
+import org.apache.hadoop.io.nativeio.NativeIO;
+import org.jboss.netty.handler.stream.ChunkedFile;
+
+public class FadvisedChunkedFile extends ChunkedFile {
+
+  private static final Log LOG = LogFactory.getLog(FadvisedChunkedFile.class);
+
+  private final boolean manageOsCache;
+  private final int readaheadLength;
+  private final ReadaheadPool readaheadPool;
+  private final FileDescriptor fd;
+  private final String identifier;
+
+  private ReadaheadRequest readaheadRequest;
+
+  public FadvisedChunkedFile(RandomAccessFile file, long position, long count,
+      int chunkSize, boolean manageOsCache, int readaheadLength,
+      ReadaheadPool readaheadPool, String identifier) throws IOException {
+    super(file, position, count, chunkSize);
+    this.manageOsCache = manageOsCache;
+    this.readaheadLength = readaheadLength;
+    this.readaheadPool = readaheadPool;
+    this.fd = file.getFD();
+    this.identifier = identifier;
+  }
+
+  @Override
+  public Object nextChunk() throws Exception {
+    if (manageOsCache && readaheadPool != null) {
+      readaheadRequest = readaheadPool
+          .readaheadStream(identifier, fd, getCurrentOffset(), readaheadLength,
+              getEndOffset(), readaheadRequest);
+    }
+    return super.nextChunk();
+  }
+
+  @Override
+  public void close() throws Exception {
+    if (readaheadRequest != null) {
+      readaheadRequest.cancel();
+    }
+    if (manageOsCache && getEndOffset() - getStartOffset() > 0) {
+      try {
+        NativeIO.POSIX.getCacheManipulator().posixFadviseIfPossible(identifier,
+            fd,
+            getStartOffset(), getEndOffset() - getStartOffset(),
+            NativeIO.POSIX.POSIX_FADV_DONTNEED);
+      } catch (Throwable t) {
+        LOG.warn("Failed to manage OS cache for " + identifier, t);
+      }
+    }
+    super.close();
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java
new file mode 100644
index 0000000..bdffe52
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.shufflehandler;
+
+import java.io.FileDescriptor;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.WritableByteChannel;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.ReadaheadPool;
+import org.apache.hadoop.io.ReadaheadPool.ReadaheadRequest;
+import org.apache.hadoop.io.nativeio.NativeIO;
+import org.jboss.netty.channel.DefaultFileRegion;
+
+public class FadvisedFileRegion extends DefaultFileRegion {
+
+  private static final Log LOG = LogFactory.getLog(FadvisedFileRegion.class);
+
+  private final boolean manageOsCache;
+  private final int readaheadLength;
+  private final ReadaheadPool readaheadPool;
+  private final FileDescriptor fd;
+  private final String identifier;
+  private final long count;
+  private final long position;
+  private final int shuffleBufferSize;
+  private final boolean shuffleTransferToAllowed;
+  private final FileChannel fileChannel;
+  
+  private ReadaheadRequest readaheadRequest;
+
+  public FadvisedFileRegion(RandomAccessFile file, long position, long count,
+      boolean manageOsCache, int readaheadLength, ReadaheadPool readaheadPool,
+      String identifier, int shuffleBufferSize, 
+      boolean shuffleTransferToAllowed) throws IOException {
+    super(file.getChannel(), position, count);
+    this.manageOsCache = manageOsCache;
+    this.readaheadLength = readaheadLength;
+    this.readaheadPool = readaheadPool;
+    this.fd = file.getFD();
+    this.identifier = identifier;
+    this.fileChannel = file.getChannel();
+    this.count = count;
+    this.position = position;
+    this.shuffleBufferSize = shuffleBufferSize;
+    this.shuffleTransferToAllowed = shuffleTransferToAllowed;
+  }
+
+  @Override
+  public long transferTo(WritableByteChannel target, long position)
+      throws IOException {
+    if (manageOsCache && readaheadPool != null) {
+      readaheadRequest = readaheadPool.readaheadStream(identifier, fd,
+          getPosition() + position, readaheadLength,
+          getPosition() + getCount(), readaheadRequest);
+    }
+    
+    if(this.shuffleTransferToAllowed) {
+      return super.transferTo(target, position);
+    } else {
+      return customShuffleTransfer(target, position);
+    } 
+  }
+
+  /**
+   * This method transfers data using local buffer. It transfers data from 
+   * a disk to a local buffer in memory, and then it transfers data from the 
+   * buffer to the target. This is used only if transferTo is disallowed in
+   * the configuration file. super.TransferTo does not perform well on Windows 
+   * due to a small IO request generated. customShuffleTransfer can control 
+   * the size of the IO requests by changing the size of the intermediate 
+   * buffer.
+   */
+  @VisibleForTesting
+  long customShuffleTransfer(WritableByteChannel target, long position)
+      throws IOException {
+    long actualCount = this.count - position;
+    if (actualCount < 0 || position < 0) {
+      throw new IllegalArgumentException(
+          "position out of range: " + position +
+          " (expected: 0 - " + (this.count - 1) + ')');
+    }
+    if (actualCount == 0) {
+      return 0L;
+    }
+    
+    long trans = actualCount;
+    int readSize;
+    ByteBuffer byteBuffer = ByteBuffer.allocate(this.shuffleBufferSize);
+    
+    while(trans > 0L &&
+        (readSize = fileChannel.read(byteBuffer, this.position+position)) > 0) {
+      //adjust counters and buffer limit
+      if(readSize < trans) {
+        trans -= readSize;
+        position += readSize;
+        byteBuffer.flip();
+      } else {
+        //We can read more than we need if the actualCount is not multiple 
+        //of the byteBuffer size and file is big enough. In that case we cannot
+        //use flip method but we need to set buffer limit manually to trans.
+        byteBuffer.limit((int)trans);
+        byteBuffer.position(0);
+        position += trans; 
+        trans = 0;
+      }
+      
+      //write data to the target
+      while(byteBuffer.hasRemaining()) {
+        target.write(byteBuffer);
+      }
+      
+      byteBuffer.clear();
+    }
+    
+    return actualCount - trans;
+  }
+
+  
+  @Override
+  public void releaseExternalResources() {
+    if (readaheadRequest != null) {
+      readaheadRequest.cancel();
+    }
+    super.releaseExternalResources();
+  }
+  
+  /**
+   * Call when the transfer completes successfully so we can advise the OS that
+   * we don't need the region to be cached anymore.
+   */
+  public void transferSuccessful() {
+    if (manageOsCache && getCount() > 0) {
+      try {
+        NativeIO.POSIX.getCacheManipulator().posixFadviseIfPossible(identifier,
+           fd, getPosition(), getCount(),
+           NativeIO.POSIX.POSIX_FADV_DONTNEED);
+      } catch (Throwable t) {
+        LOG.warn("Failed to manage OS cache for " + identifier, t);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java
new file mode 100644
index 0000000..9a51ca0
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java
@@ -0,0 +1,199 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tez.shufflehandler;
+
+import java.io.IOException;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.tez.runtime.library.common.Constants;
+import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord;
+import org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord;
+
+class IndexCache {
+
+  private final Configuration conf;
+  private final int totalMemoryAllowed;
+  private AtomicInteger totalMemoryUsed = new AtomicInteger();
+  private static final Log LOG = LogFactory.getLog(IndexCache.class);
+
+  private final ConcurrentHashMap<String,IndexInformation> cache =
+      new ConcurrentHashMap<String,IndexInformation>();
+
+  private final LinkedBlockingQueue<String> queue =
+      new LinkedBlockingQueue<String>();
+
+  public IndexCache(Configuration conf) {
+    this.conf = conf;
+    totalMemoryAllowed = 10 * 1024 * 1024;
+    LOG.info("IndexCache created with max memory = " + totalMemoryAllowed);
+  }
+
+  /**
+   * This method gets the index information for the given mapId and reduce.
+   * It reads the index file into cache if it is not already present.
+   * @param mapId
+   * @param reduce
+   * @param fileName The file to read the index information from if it is not
+   *                 already present in the cache
+   * @param expectedIndexOwner The expected owner of the index file
+   * @return The Index Information
+   * @throws IOException
+   */
+  public TezIndexRecord getIndexInformation(String mapId, int reduce,
+                                         Path fileName, String expectedIndexOwner)
+      throws IOException {
+
+    IndexInformation info = cache.get(mapId);
+
+    if (info == null) {
+      info = readIndexFileToCache(fileName, mapId, expectedIndexOwner);
+    } else {
+      synchronized(info) {
+        while (isUnderConstruction(info)) {
+          try {
+            info.wait();
+          } catch (InterruptedException e) {
+            throw new IOException("Interrupted waiting for construction", e);
+          }
+        }
+      }
+      LOG.debug("IndexCache HIT: MapId " + mapId + " found");
+    }
+
+    if (info.mapSpillRecord.size() == 0 ||
+        info.mapSpillRecord.size() <= reduce) {
+      throw new IOException("Invalid request " +
+          " Map Id = " + mapId + " Reducer = " + reduce +
+          " Index Info Length = " + info.mapSpillRecord.size());
+    }
+    return info.mapSpillRecord.getIndex(reduce);
+  }
+
+  private boolean isUnderConstruction(IndexInformation info) {
+    synchronized(info) {
+      return (null == info.mapSpillRecord);
+    }
+  }
+
+  private IndexInformation readIndexFileToCache(Path indexFileName,
+                                                String mapId,
+                                                String expectedIndexOwner)
+      throws IOException {
+    IndexInformation info;
+    IndexInformation newInd = new IndexInformation();
+    if ((info = cache.putIfAbsent(mapId, newInd)) != null) {
+      synchronized(info) {
+        while (isUnderConstruction(info)) {
+          try {
+            info.wait();
+          } catch (InterruptedException e) {
+            throw new IOException("Interrupted waiting for construction", e);
+          }
+        }
+      }
+      LOG.debug("IndexCache HIT: MapId " + mapId + " found");
+      return info;
+    }
+    LOG.debug("IndexCache MISS: MapId " + mapId + " not found") ;
+    TezSpillRecord tmp = null;
+    try {
+      tmp = new TezSpillRecord(indexFileName, conf, expectedIndexOwner);
+    } catch (Throwable e) {
+      tmp = new TezSpillRecord(0);
+      cache.remove(mapId);
+      throw new IOException("Error Reading IndexFile", e);
+    } finally {
+      synchronized (newInd) {
+        newInd.mapSpillRecord = tmp;
+        newInd.notifyAll();
+      }
+    }
+    queue.add(mapId);
+
+    if (totalMemoryUsed.addAndGet(newInd.getSize()) > totalMemoryAllowed) {
+      freeIndexInformation();
+    }
+    return newInd;
+  }
+
+  /**
+   * This method removes the map from the cache if index information for this
+   * map is loaded(size>0), index information entry in cache will not be 
+   * removed if it is in the loading phrase(size=0), this prevents corruption  
+   * of totalMemoryUsed. It should be called when a map output on this tracker 
+   * is discarded.
+   * @param mapId The taskID of this map.
+   */
+  public void removeMap(String mapId) {
+    IndexInformation info = cache.get(mapId);
+    if (info == null || ((info != null) && isUnderConstruction(info))) {
+      return;
+    }
+    info = cache.remove(mapId);
+    if (info != null) {
+      totalMemoryUsed.addAndGet(-info.getSize());
+      if (!queue.remove(mapId)) {
+        LOG.warn("Map ID" + mapId + " not found in queue!!");
+      }
+    } else {
+      LOG.info("Map ID " + mapId + " not found in cache");
+    }
+  }
+
+  /**
+   * This method checks if cache and totolMemoryUsed is consistent.
+   * It is only used for unit test.
+   * @return True if cache and totolMemoryUsed is consistent
+   */
+  boolean checkTotalMemoryUsed() {
+    int totalSize = 0;
+    for (IndexInformation info : cache.values()) {
+      totalSize += info.getSize();
+    }
+    return totalSize == totalMemoryUsed.get();
+  }
+
+  /**
+   * Bring memory usage below totalMemoryAllowed.
+   */
+  private synchronized void freeIndexInformation() {
+    while (totalMemoryUsed.get() > totalMemoryAllowed) {
+      String s = queue.remove();
+      IndexInformation info = cache.remove(s);
+      if (info != null) {
+        totalMemoryUsed.addAndGet(-info.getSize());
+      }
+    }
+  }
+
+  private static class IndexInformation {
+    TezSpillRecord mapSpillRecord;
+
+    int getSize() {
+      return mapSpillRecord == null
+          ? 0
+          : mapSpillRecord.size() * Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java
new file mode 100644
index 0000000..cc82d74
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java
@@ -0,0 +1,840 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.shufflehandler;
+
+import static org.jboss.netty.buffer.ChannelBuffers.wrappedBuffer;
+import static org.jboss.netty.handler.codec.http.HttpHeaders.Names.CONTENT_TYPE;
+import static org.jboss.netty.handler.codec.http.HttpMethod.GET;
+import static org.jboss.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST;
+import static org.jboss.netty.handler.codec.http.HttpResponseStatus.FORBIDDEN;
+import static org.jboss.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR;
+import static org.jboss.netty.handler.codec.http.HttpResponseStatus.METHOD_NOT_ALLOWED;
+import static org.jboss.netty.handler.codec.http.HttpResponseStatus.NOT_FOUND;
+import static org.jboss.netty.handler.codec.http.HttpResponseStatus.OK;
+import static org.jboss.netty.handler.codec.http.HttpResponseStatus.UNAUTHORIZED;
+import static org.jboss.netty.handler.codec.http.HttpVersion.HTTP_1_1;
+
+import javax.crypto.SecretKey;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.net.InetSocketAddress;
+import java.net.URL;
+import java.nio.ByteBuffer;
+import java.nio.channels.ClosedChannelException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.regex.Pattern;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Preconditions;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.LocalDirAllocator;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DataInputByteBuffer;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.ReadaheadPool;
+import org.apache.hadoop.io.SecureIOUtils;
+import org.apache.hadoop.metrics2.annotation.Metric;
+import org.apache.hadoop.metrics2.annotation.Metrics;
+import org.apache.hadoop.metrics2.lib.MutableCounterInt;
+import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
+import org.apache.hadoop.security.ssl.SSLFactory;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.tez.common.security.JobTokenIdentifier;
+import org.apache.tez.common.security.JobTokenSecretManager;
+import org.apache.tez.runtime.library.common.security.SecureShuffleUtils;
+import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.ShuffleHeader;
+import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord;
+import org.jboss.netty.bootstrap.ServerBootstrap;
+import org.jboss.netty.buffer.ChannelBuffers;
+import org.jboss.netty.channel.Channel;
+import org.jboss.netty.channel.ChannelFactory;
+import org.jboss.netty.channel.ChannelFuture;
+import org.jboss.netty.channel.ChannelFutureListener;
+import org.jboss.netty.channel.ChannelHandlerContext;
+import org.jboss.netty.channel.ChannelPipeline;
+import org.jboss.netty.channel.ChannelPipelineFactory;
+import org.jboss.netty.channel.ChannelStateEvent;
+import org.jboss.netty.channel.Channels;
+import org.jboss.netty.channel.ExceptionEvent;
+import org.jboss.netty.channel.MessageEvent;
+import org.jboss.netty.channel.SimpleChannelUpstreamHandler;
+import org.jboss.netty.channel.group.ChannelGroup;
+import org.jboss.netty.channel.group.DefaultChannelGroup;
+import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory;
+import org.jboss.netty.handler.codec.frame.TooLongFrameException;
+import org.jboss.netty.handler.codec.http.DefaultHttpResponse;
+import org.jboss.netty.handler.codec.http.HttpChunkAggregator;
+import org.jboss.netty.handler.codec.http.HttpHeaders;
+import org.jboss.netty.handler.codec.http.HttpRequest;
+import org.jboss.netty.handler.codec.http.HttpRequestDecoder;
+import org.jboss.netty.handler.codec.http.HttpResponse;
+import org.jboss.netty.handler.codec.http.HttpResponseEncoder;
+import org.jboss.netty.handler.codec.http.HttpResponseStatus;
+import org.jboss.netty.handler.codec.http.QueryStringDecoder;
+import org.jboss.netty.handler.ssl.SslHandler;
+import org.jboss.netty.handler.stream.ChunkedWriteHandler;
+import org.jboss.netty.util.CharsetUtil;
+
+public class ShuffleHandler {
+
+  private static final Log LOG = LogFactory.getLog(ShuffleHandler.class);
+
+  public static final String SHUFFLE_HANDLER_LOCAL_DIRS = "tez.shuffle.handler.local-dirs";
+
+  public static final String SHUFFLE_MANAGE_OS_CACHE = "mapreduce.shuffle.manage.os.cache";
+  public static final boolean DEFAULT_SHUFFLE_MANAGE_OS_CACHE = true;
+
+  public static final String SHUFFLE_READAHEAD_BYTES = "mapreduce.shuffle.readahead.bytes";
+  public static final int DEFAULT_SHUFFLE_READAHEAD_BYTES = 4 * 1024 * 1024;
+  
+  // pattern to identify errors related to the client closing the socket early
+  // idea borrowed from Netty SslHandler
+  private static final Pattern IGNORABLE_ERROR_MESSAGE = Pattern.compile(
+      "^.*(?:connection.*reset|connection.*closed|broken.*pipe).*$",
+      Pattern.CASE_INSENSITIVE);
+
+  private int port;
+  private final ChannelFactory selector;
+  private final ChannelGroup accepted = new DefaultChannelGroup();
+  protected HttpPipelineFactory pipelineFact;
+  private final int sslFileBufferSize;
+  private final Configuration conf;
+
+  private final ConcurrentMap<String, Boolean> registeredApps = new ConcurrentHashMap<String, Boolean>();
+
+  /**
+   * Should the shuffle use posix_fadvise calls to manage the OS cache during
+   * sendfile
+   */
+  private final boolean manageOsCache;
+  private final int readaheadLength;
+  private final int maxShuffleConnections;
+  private final int shuffleBufferSize;
+  private final boolean shuffleTransferToAllowed;
+  private final ReadaheadPool readaheadPool = ReadaheadPool.getInstance();
+
+  private Map<String,String> userRsrc;
+  private JobTokenSecretManager secretManager;
+
+  // TODO Fix this for tez.
+  public static final String MAPREDUCE_SHUFFLE_SERVICEID =
+      "mapreduce_shuffle";
+
+  public static final String SHUFFLE_PORT_CONFIG_KEY = "tez.shuffle.port";
+  public static final int DEFAULT_SHUFFLE_PORT = 15551;
+
+  // TODO Change configs to remove mapreduce references.
+  public static final String SHUFFLE_CONNECTION_KEEP_ALIVE_ENABLED =
+      "mapreduce.shuffle.connection-keep-alive.enable";
+  public static final boolean DEFAULT_SHUFFLE_CONNECTION_KEEP_ALIVE_ENABLED = false;
+
+  public static final String SHUFFLE_CONNECTION_KEEP_ALIVE_TIME_OUT =
+      "mapreduce.shuffle.connection-keep-alive.timeout";
+  public static final int DEFAULT_SHUFFLE_CONNECTION_KEEP_ALIVE_TIME_OUT = 5; //seconds
+
+  public static final String SHUFFLE_MAPOUTPUT_META_INFO_CACHE_SIZE =
+      "mapreduce.shuffle.mapoutput-info.meta.cache.size";
+  public static final int DEFAULT_SHUFFLE_MAPOUTPUT_META_INFO_CACHE_SIZE =
+      1000;
+
+  public static final String CONNECTION_CLOSE = "close";
+
+  public static final String SUFFLE_SSL_FILE_BUFFER_SIZE_KEY =
+    "mapreduce.shuffle.ssl.file.buffer.size";
+
+  public static final int DEFAULT_SUFFLE_SSL_FILE_BUFFER_SIZE = 60 * 1024;
+
+  public static final String MAX_SHUFFLE_CONNECTIONS = "mapreduce.shuffle.max.connections";
+  public static final int DEFAULT_MAX_SHUFFLE_CONNECTIONS = 0; // 0 implies no limit
+  
+  public static final String MAX_SHUFFLE_THREADS = "mapreduce.shuffle.max.threads";
+  // 0 implies Netty default of 2 * number of available processors
+  public static final int DEFAULT_MAX_SHUFFLE_THREADS = 0;
+  
+  public static final String SHUFFLE_BUFFER_SIZE = 
+      "mapreduce.shuffle.transfer.buffer.size";
+  public static final int DEFAULT_SHUFFLE_BUFFER_SIZE = 128 * 1024;
+  
+  public static final String  SHUFFLE_TRANSFERTO_ALLOWED = 
+      "mapreduce.shuffle.transferTo.allowed";
+  public static final boolean DEFAULT_SHUFFLE_TRANSFERTO_ALLOWED = true;
+  public static final boolean WINDOWS_DEFAULT_SHUFFLE_TRANSFERTO_ALLOWED = 
+      false;
+
+  final boolean connectionKeepAliveEnabled;
+  final int connectionKeepAliveTimeOut;
+  final int mapOutputMetaInfoCacheSize;
+  private static final AtomicBoolean started = new AtomicBoolean(false);
+  private static final AtomicBoolean initing = new AtomicBoolean(false);
+  private static ShuffleHandler INSTANCE;
+
+  @Metrics(about="Shuffle output metrics", context="mapred")
+  static class ShuffleMetrics implements ChannelFutureListener {
+    @Metric("Shuffle output in bytes")
+    MutableCounterLong shuffleOutputBytes;
+    @Metric("# of failed shuffle outputs")
+    MutableCounterInt shuffleOutputsFailed;
+    @Metric("# of succeeeded shuffle outputs")
+    MutableCounterInt shuffleOutputsOK;
+    @Metric("# of current shuffle connections")
+    MutableGaugeInt shuffleConnections;
+
+    @Override
+    public void operationComplete(ChannelFuture future) throws Exception {
+      if (future.isSuccess()) {
+        shuffleOutputsOK.incr();
+      } else {
+        shuffleOutputsFailed.incr();
+      }
+      shuffleConnections.decr();
+    }
+  }
+
+  public ShuffleHandler(Configuration conf) {
+    this.conf = conf;
+    manageOsCache = conf.getBoolean(SHUFFLE_MANAGE_OS_CACHE,
+        DEFAULT_SHUFFLE_MANAGE_OS_CACHE);
+
+    readaheadLength = conf.getInt(SHUFFLE_READAHEAD_BYTES,
+        DEFAULT_SHUFFLE_READAHEAD_BYTES);
+
+    maxShuffleConnections = conf.getInt(MAX_SHUFFLE_CONNECTIONS,
+        DEFAULT_MAX_SHUFFLE_CONNECTIONS);
+    int maxShuffleThreads = conf.getInt(MAX_SHUFFLE_THREADS,
+        DEFAULT_MAX_SHUFFLE_THREADS);
+    if (maxShuffleThreads == 0) {
+      maxShuffleThreads = 2 * Runtime.getRuntime().availableProcessors();
+    }
+
+    shuffleBufferSize = conf.getInt(SHUFFLE_BUFFER_SIZE,
+        DEFAULT_SHUFFLE_BUFFER_SIZE);
+
+    shuffleTransferToAllowed = conf.getBoolean(SHUFFLE_TRANSFERTO_ALLOWED,
+        (Shell.WINDOWS)?WINDOWS_DEFAULT_SHUFFLE_TRANSFERTO_ALLOWED:
+            DEFAULT_SHUFFLE_TRANSFERTO_ALLOWED);
+
+    ThreadFactory bossFactory = new ThreadFactoryBuilder()
+        .setNameFormat("ShuffleHandler Netty Boss #%d")
+        .build();
+    ThreadFactory workerFactory = new ThreadFactoryBuilder()
+        .setNameFormat("ShuffleHandler Netty Worker #%d")
+        .build();
+
+    selector = new NioServerSocketChannelFactory(
+        Executors.newCachedThreadPool(bossFactory),
+        Executors.newCachedThreadPool(workerFactory),
+        maxShuffleThreads);
+
+    sslFileBufferSize = conf.getInt(SUFFLE_SSL_FILE_BUFFER_SIZE_KEY,
+        DEFAULT_SUFFLE_SSL_FILE_BUFFER_SIZE);
+    connectionKeepAliveEnabled =
+        conf.getBoolean(SHUFFLE_CONNECTION_KEEP_ALIVE_ENABLED,
+            DEFAULT_SHUFFLE_CONNECTION_KEEP_ALIVE_ENABLED);
+    connectionKeepAliveTimeOut =
+        Math.max(1, conf.getInt(SHUFFLE_CONNECTION_KEEP_ALIVE_TIME_OUT,
+            DEFAULT_SHUFFLE_CONNECTION_KEEP_ALIVE_TIME_OUT));
+    mapOutputMetaInfoCacheSize =
+        Math.max(1, conf.getInt(SHUFFLE_MAPOUTPUT_META_INFO_CACHE_SIZE,
+            DEFAULT_SHUFFLE_MAPOUTPUT_META_INFO_CACHE_SIZE));
+
+    userRsrc = new ConcurrentHashMap<String,String>();
+    secretManager = new JobTokenSecretManager();
+  }
+
+
+  public void start() throws Exception {
+    ServerBootstrap bootstrap = new ServerBootstrap(selector);
+    try {
+      pipelineFact = new HttpPipelineFactory(conf);
+    } catch (Exception ex) {
+      throw new RuntimeException(ex);
+    }
+    bootstrap.setPipelineFactory(pipelineFact);
+    port = conf.getInt(SHUFFLE_PORT_CONFIG_KEY, DEFAULT_SHUFFLE_PORT);
+    Channel ch = bootstrap.bind(new InetSocketAddress(port));
+    accepted.add(ch);
+    port = ((InetSocketAddress)ch.getLocalAddress()).getPort();
+    conf.set(SHUFFLE_PORT_CONFIG_KEY, Integer.toString(port));
+    pipelineFact.SHUFFLE.setPort(port);
+    LOG.info("TezShuffleHandler" + " listening on port " + port);
+  }
+
+  public static void initializeAndStart(Configuration conf) throws Exception {
+    if (!initing.getAndSet(true)) {
+      INSTANCE = new ShuffleHandler(conf);
+      INSTANCE.start();
+      started.set(true);
+    }
+  }
+
+  public static ShuffleHandler get() {
+    Preconditions.checkState(started.get(), "ShuffleHandler must be started before invoking started");
+    return INSTANCE;
+  }
+
+  /**
+   * Serialize the shuffle port into a ByteBuffer for use later on.
+   * @param port the port to be sent to the ApplciationMaster
+   * @return the serialized form of the port.
+   */
+  public static ByteBuffer serializeMetaData(int port) throws IOException {
+    //TODO these bytes should be versioned
+    DataOutputBuffer port_dob = new DataOutputBuffer();
+    port_dob.writeInt(port);
+    return ByteBuffer.wrap(port_dob.getData(), 0, port_dob.getLength());
+  }
+
+  /**
+   * A helper function to deserialize the metadata returned by ShuffleHandler.
+   * @param meta the metadata returned by the ShuffleHandler
+   * @return the port the Shuffle Handler is listening on to serve shuffle data.
+   */
+  public static int deserializeMetaData(ByteBuffer meta) throws IOException {
+    //TODO this should be returning a class not just an int
+    DataInputByteBuffer in = new DataInputByteBuffer();
+    in.reset(meta);
+    int port = in.readInt();
+    return port;
+  }
+
+  /**
+   * A helper function to serialize the JobTokenIdentifier to be sent to the
+   * ShuffleHandler as ServiceData.
+   * @param jobToken the job token to be used for authentication of
+   * shuffle data requests.
+   * @return the serialized version of the jobToken.
+   */
+  public static ByteBuffer serializeServiceData(Token<JobTokenIdentifier> jobToken) throws IOException {
+    //TODO these bytes should be versioned
+    DataOutputBuffer jobToken_dob = new DataOutputBuffer();
+    jobToken.write(jobToken_dob);
+    return ByteBuffer.wrap(jobToken_dob.getData(), 0, jobToken_dob.getLength());
+  }
+
+  static Token<JobTokenIdentifier> deserializeServiceData(ByteBuffer secret) throws IOException {
+    DataInputByteBuffer in = new DataInputByteBuffer();
+    in.reset(secret);
+    Token<JobTokenIdentifier> jt = new Token<JobTokenIdentifier>();
+    jt.readFields(in);
+    return jt;
+  }
+
+  public int getPort() {
+    return port;
+  }
+
+  public void registerApplication(String applicationIdString, Token<JobTokenIdentifier> appToken,
+                                  String user) {
+    Boolean registered = registeredApps.putIfAbsent(applicationIdString, Boolean.valueOf(true));
+    if (registered == null) {
+      recordJobShuffleInfo(applicationIdString, user, appToken);
+    }
+  }
+
+  public void unregisterApplication(String applicationIdString) {
+    removeJobShuffleInfo(applicationIdString);
+  }
+
+
+  public void stop() throws Exception {
+    accepted.close().awaitUninterruptibly(10, TimeUnit.SECONDS);
+    if (selector != null) {
+      ServerBootstrap bootstrap = new ServerBootstrap(selector);
+      bootstrap.releaseExternalResources();
+    }
+    if (pipelineFact != null) {
+      pipelineFact.destroy();
+    }
+  }
+
+  protected Shuffle getShuffle(Configuration conf) {
+    return new Shuffle(conf);
+  }
+
+
+  private void addJobToken(String appIdString, String user,
+      Token<JobTokenIdentifier> jobToken) {
+    String jobIdString = appIdString.replace("application", "job");
+    userRsrc.put(jobIdString, user);
+    secretManager.addTokenForJob(jobIdString, jobToken);
+    LOG.info("Added token for " + jobIdString);
+  }
+
+  private void recordJobShuffleInfo(String appIdString, String user,
+      Token<JobTokenIdentifier> jobToken) {
+    addJobToken(appIdString, user, jobToken);
+  }
+
+  private void removeJobShuffleInfo(String appIdString) {
+    secretManager.removeTokenForJob(appIdString);
+    userRsrc.remove(appIdString);
+  }
+
+  class HttpPipelineFactory implements ChannelPipelineFactory {
+
+    final Shuffle SHUFFLE;
+    private SSLFactory sslFactory;
+
+    public HttpPipelineFactory(Configuration conf) throws Exception {
+      SHUFFLE = getShuffle(conf);
+      // TODO Setup SSL Shuffle
+//      if (conf.getBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY,
+//                          MRConfig.SHUFFLE_SSL_ENABLED_DEFAULT)) {
+//        LOG.info("Encrypted shuffle is enabled.");
+//        sslFactory = new SSLFactory(SSLFactory.Mode.SERVER, conf);
+//        sslFactory.init();
+//      }
+    }
+
+    public void destroy() {
+      if (sslFactory != null) {
+        sslFactory.destroy();
+      }
+    }
+
+    @Override
+    public ChannelPipeline getPipeline() throws Exception {
+      ChannelPipeline pipeline = Channels.pipeline();
+      if (sslFactory != null) {
+        pipeline.addLast("ssl", new SslHandler(sslFactory.createSSLEngine()));
+      }
+      pipeline.addLast("decoder", new HttpRequestDecoder());
+      pipeline.addLast("aggregator", new HttpChunkAggregator(1 << 16));
+      pipeline.addLast("encoder", new HttpResponseEncoder());
+      pipeline.addLast("chunking", new ChunkedWriteHandler());
+      pipeline.addLast("shuffle", SHUFFLE);
+      return pipeline;
+      // TODO factor security manager into pipeline
+      // TODO factor out encode/decode to permit binary shuffle
+      // TODO factor out decode of index to permit alt. models
+    }
+
+  }
+
+  class Shuffle extends SimpleChannelUpstreamHandler {
+
+    private final Configuration conf;
+    private final IndexCache indexCache;
+    private final LocalDirAllocator lDirAlloc =
+      new LocalDirAllocator(SHUFFLE_HANDLER_LOCAL_DIRS);
+    private int port;
+
+    public Shuffle(Configuration conf) {
+      this.conf = conf;
+      indexCache = new IndexCache(conf);
+      this.port = conf.getInt(SHUFFLE_PORT_CONFIG_KEY, DEFAULT_SHUFFLE_PORT);
+    }
+    
+    public void setPort(int port) {
+      this.port = port;
+    }
+
+    private List<String> splitMaps(List<String> mapq) {
+      if (null == mapq) {
+        return null;
+      }
+      final List<String> ret = new ArrayList<String>();
+      for (String s : mapq) {
+        Collections.addAll(ret, s.split(","));
+      }
+      return ret;
+    }
+
+    @Override
+    public void channelOpen(ChannelHandlerContext ctx, ChannelStateEvent evt) 
+        throws Exception {
+      if ((maxShuffleConnections > 0) && (accepted.size() >= maxShuffleConnections)) {
+        LOG.info(String.format("Current number of shuffle connections (%d) is " + 
+            "greater than or equal to the max allowed shuffle connections (%d)", 
+            accepted.size(), maxShuffleConnections));
+        evt.getChannel().close();
+        return;
+      }
+      accepted.add(evt.getChannel());
+      super.channelOpen(ctx, evt);
+     
+    }
+
+    @Override
+    public void messageReceived(ChannelHandlerContext ctx, MessageEvent evt)
+        throws Exception {
+      HttpRequest request = (HttpRequest) evt.getMessage();
+      if (request.getMethod() != GET) {
+          sendError(ctx, METHOD_NOT_ALLOWED);
+          return;
+      }
+      // Check whether the shuffle version is compatible
+      if (!ShuffleHeader.DEFAULT_HTTP_HEADER_NAME.equals(
+          request.getHeader(ShuffleHeader.HTTP_HEADER_NAME))
+          || !ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION.equals(
+              request.getHeader(ShuffleHeader.HTTP_HEADER_VERSION))) {
+        sendError(ctx, "Incompatible shuffle request version", BAD_REQUEST);
+      }
+      final Map<String,List<String>> q =
+        new QueryStringDecoder(request.getUri()).getParameters();
+      final List<String> keepAliveList = q.get("keepAlive");
+      boolean keepAliveParam = false;
+      if (keepAliveList != null && keepAliveList.size() == 1) {
+        keepAliveParam = Boolean.valueOf(keepAliveList.get(0));
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("KeepAliveParam : " + keepAliveList
+            + " : " + keepAliveParam);
+        }
+      }
+      final List<String> mapIds = splitMaps(q.get("map"));
+      final List<String> reduceQ = q.get("reduce");
+      final List<String> jobQ = q.get("job");
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("RECV: " + request.getUri() +
+            "\n  mapId: " + mapIds +
+            "\n  reduceId: " + reduceQ +
+            "\n  jobId: " + jobQ +
+            "\n  keepAlive: " + keepAliveParam);
+      }
+
+      if (mapIds == null || reduceQ == null || jobQ == null) {
+        sendError(ctx, "Required param job, map and reduce", BAD_REQUEST);
+        return;
+      }
+      if (reduceQ.size() != 1 || jobQ.size() != 1) {
+        sendError(ctx, "Too many job/reduce parameters", BAD_REQUEST);
+        return;
+      }
+      int reduceId;
+      String jobId;
+      try {
+        reduceId = Integer.parseInt(reduceQ.get(0));
+        jobId = jobQ.get(0);
+      } catch (NumberFormatException e) {
+        sendError(ctx, "Bad reduce parameter", BAD_REQUEST);
+        return;
+      } catch (IllegalArgumentException e) {
+        sendError(ctx, "Bad job parameter", BAD_REQUEST);
+        return;
+      }
+      final String reqUri = request.getUri();
+      if (null == reqUri) {
+        // TODO? add upstream?
+        sendError(ctx, FORBIDDEN);
+        return;
+      }
+      HttpResponse response = new DefaultHttpResponse(HTTP_1_1, OK);
+      try {
+        verifyRequest(jobId, ctx, request, response,
+            new URL("http", "", this.port, reqUri));
+      } catch (IOException e) {
+        LOG.warn("Shuffle failure ", e);
+        sendError(ctx, e.getMessage(), UNAUTHORIZED);
+        return;
+      }
+
+      Map<String, MapOutputInfo> mapOutputInfoMap =
+          new HashMap<String, MapOutputInfo>();
+      Channel ch = evt.getChannel();
+      String user = userRsrc.get(jobId);
+
+      // $x/$user/appcache/$appId/output/$mapId
+      // TODO: Once Shuffle is out of NM, this can use MR APIs to convert
+      // between App and Job
+      String outputBasePathStr = getBaseLocation(jobId, user);
+
+      try {
+        populateHeaders(mapIds, outputBasePathStr, user, reduceId, request,
+          response, keepAliveParam, mapOutputInfoMap);
+      } catch(IOException e) {
+        ch.write(response);
+        LOG.error("Shuffle error in populating headers :", e);
+        String errorMessage = getErrorMessage(e);
+        sendError(ctx,errorMessage , INTERNAL_SERVER_ERROR);
+        return;
+      }
+      ch.write(response);
+      // TODO refactor the following into the pipeline
+      ChannelFuture lastMap = null;
+      for (String mapId : mapIds) {
+        try {
+          MapOutputInfo info = mapOutputInfoMap.get(mapId);
+          if (info == null) {
+            info = getMapOutputInfo(outputBasePathStr, mapId, reduceId, user);
+          }
+          lastMap =
+              sendMapOutput(ctx, ch, user, mapId,
+                reduceId, info);
+          if (null == lastMap) {
+            sendError(ctx, NOT_FOUND);
+            return;
+          }
+        } catch (IOException e) {
+          LOG.error("Shuffle error :", e);
+          String errorMessage = getErrorMessage(e);
+          sendError(ctx,errorMessage , INTERNAL_SERVER_ERROR);
+          return;
+        }
+      }
+      lastMap.addListener(ChannelFutureListener.CLOSE);
+    }
+
+    private String getErrorMessage(Throwable t) {
+      StringBuffer sb = new StringBuffer(t.getMessage());
+      while (t.getCause() != null) {
+        sb.append(t.getCause().getMessage());
+        t = t.getCause();
+      }
+      return sb.toString();
+    }
+
+    private final String USERCACHE_CONSTANT = "usercache";
+    private final String APPCACHE_CONSTANT = "appcache";
+
+    private String getBaseLocation(String jobIdString, String user) {
+      String parts[] = jobIdString.split("_");
+      Preconditions.checkArgument(parts.length == 3, "Invalid jobId. Expecting 3 parts");
+      final ApplicationId appID =
+          ApplicationId.newInstance(Long.parseLong(parts[1]), Integer.parseInt(parts[2]));
+      final String baseStr =
+          USERCACHE_CONSTANT + "/" + user + "/"
+              + APPCACHE_CONSTANT + "/"
+              + ConverterUtils.toString(appID) + "/output" + "/";
+      return baseStr;
+    }
+
+    protected MapOutputInfo getMapOutputInfo(String base, String mapId,
+        int reduce, String user) throws IOException {
+      // Index file
+      Path indexFileName =
+          lDirAlloc.getLocalPathToRead(base + "/file.out.index", conf);
+      TezIndexRecord info =
+          indexCache.getIndexInformation(mapId, reduce, indexFileName, user);
+
+      Path mapOutputFileName =
+          lDirAlloc.getLocalPathToRead(base + "/file.out", conf);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(base + " : " + mapOutputFileName + " : " + indexFileName);
+      }
+      MapOutputInfo outputInfo = new MapOutputInfo(mapOutputFileName, info);
+      return outputInfo;
+    }
+
+    protected void populateHeaders(List<String> mapIds, String outputBaseStr,
+        String user, int reduce, HttpRequest request, HttpResponse response,
+        boolean keepAliveParam, Map<String, MapOutputInfo> mapOutputInfoMap)
+        throws IOException {
+
+      long contentLength = 0;
+      for (String mapId : mapIds) {
+        String base = outputBaseStr + mapId;
+        MapOutputInfo outputInfo = getMapOutputInfo(base, mapId, reduce, user);
+        if (mapOutputInfoMap.size() < mapOutputMetaInfoCacheSize) {
+          mapOutputInfoMap.put(mapId, outputInfo);
+        }
+        // Index file
+        Path indexFileName =
+            lDirAlloc.getLocalPathToRead(base + "/file.out.index", conf);
+        TezIndexRecord info =
+            indexCache.getIndexInformation(mapId, reduce, indexFileName, user);
+        ShuffleHeader header =
+            new ShuffleHeader(mapId, info.getPartLength(), info.getRawLength(), reduce);
+        DataOutputBuffer dob = new DataOutputBuffer();
+        header.write(dob);
+
+        contentLength += info.getPartLength();
+        contentLength += dob.getLength();
+      }
+
+      // Now set the response headers.
+      setResponseHeaders(response, keepAliveParam, contentLength);
+    }
+
+    protected void setResponseHeaders(HttpResponse response,
+        boolean keepAliveParam, long contentLength) {
+      if (!connectionKeepAliveEnabled && !keepAliveParam) {
+        LOG.info("Setting connection close header...");
+        response.setHeader(HttpHeaders.Names.CONNECTION, CONNECTION_CLOSE);
+      } else {
+        response.setHeader(HttpHeaders.Names.CONTENT_LENGTH,
+          String.valueOf(contentLength));
+        response.setHeader(HttpHeaders.Names.CONNECTION, HttpHeaders.Values.KEEP_ALIVE);
+        response.setHeader(HttpHeaders.Values.KEEP_ALIVE, "timeout="
+            + connectionKeepAliveTimeOut);
+        LOG.info("Content Length in shuffle : " + contentLength);
+      }
+    }
+
+    class MapOutputInfo {
+      final Path mapOutputFileName;
+      final TezIndexRecord indexRecord;
+
+      MapOutputInfo(Path mapOutputFileName, TezIndexRecord indexRecord) {
+        this.mapOutputFileName = mapOutputFileName;
+        this.indexRecord = indexRecord;
+      }
+    }
+
+    protected void verifyRequest(String appid, ChannelHandlerContext ctx,
+        HttpRequest request, HttpResponse response, URL requestUri)
+        throws IOException {
+      SecretKey tokenSecret = secretManager.retrieveTokenSecret(appid);
+      if (null == tokenSecret) {
+        LOG.info("Request for unknown token " + appid);
+        throw new IOException("could not find jobid");
+      }
+      // string to encrypt
+      String enc_str = SecureShuffleUtils.buildMsgFrom(requestUri);
+      // hash from the fetcher
+      String urlHashStr =
+        request.getHeader(SecureShuffleUtils.HTTP_HEADER_URL_HASH);
+      if (urlHashStr == null) {
+        LOG.info("Missing header hash for " + appid);
+        throw new IOException("fetcher cannot be authenticated");
+      }
+      if (LOG.isDebugEnabled()) {
+        int len = urlHashStr.length();
+        LOG.debug("verifying request. enc_str=" + enc_str + "; hash=..." +
+            urlHashStr.substring(len-len/2, len-1));
+      }
+      // verify - throws exception
+      SecureShuffleUtils.verifyReply(urlHashStr, enc_str, tokenSecret);
+      // verification passed - encode the reply
+      String reply =
+        SecureShuffleUtils.generateHash(urlHashStr.getBytes(Charsets.UTF_8), 
+            tokenSecret);
+      response.setHeader(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH, reply);
+      // Put shuffle version into http header
+      response.setHeader(ShuffleHeader.HTTP_HEADER_NAME,
+          ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
+      response.setHeader(ShuffleHeader.HTTP_HEADER_VERSION,
+          ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
+      if (LOG.isDebugEnabled()) {
+        int len = reply.length();
+        LOG.debug("Fetcher request verfied. enc_str=" + enc_str + ";reply=" +
+            reply.substring(len-len/2, len-1));
+      }
+    }
+
+    protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, Channel ch,
+        String user, String mapId, int reduce, MapOutputInfo mapOutputInfo)
+        throws IOException {
+      final TezIndexRecord info = mapOutputInfo.indexRecord;
+      final ShuffleHeader header =
+        new ShuffleHeader(mapId, info.getPartLength(), info.getRawLength(), reduce);
+      final DataOutputBuffer dob = new DataOutputBuffer();
+      header.write(dob);
+      ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength()));
+      final File spillfile =
+          new File(mapOutputInfo.mapOutputFileName.toString());
+      RandomAccessFile spill;
+      try {
+        spill = SecureIOUtils.openForRandomRead(spillfile, "r", user, null);
+      } catch (FileNotFoundException e) {
+        LOG.info(spillfile + " not found");
+        return null;
+      }
+      ChannelFuture writeFuture;
+      if (ch.getPipeline().get(SslHandler.class) == null) {
+        final FadvisedFileRegion partition = new FadvisedFileRegion(spill,
+            info.getStartOffset(), info.getPartLength(), manageOsCache, readaheadLength,
+            readaheadPool, spillfile.getAbsolutePath(), 
+            shuffleBufferSize, shuffleTransferToAllowed);
+        writeFuture = ch.write(partition);
+        writeFuture.addListener(new ChannelFutureListener() {
+            // TODO error handling; distinguish IO/connection failures,
+            //      attribute to appropriate spill output
+          @Override
+          public void operationComplete(ChannelFuture future) {
+            if (future.isSuccess()) {
+              partition.transferSuccessful();
+            }
+            partition.releaseExternalResources();
+          }
+        });
+      } else {
+        // HTTPS cannot be done with zero copy.
+        final FadvisedChunkedFile chunk = new FadvisedChunkedFile(spill,
+            info.getStartOffset(), info.getPartLength(), sslFileBufferSize,
+            manageOsCache, readaheadLength, readaheadPool,
+            spillfile.getAbsolutePath());
+        writeFuture = ch.write(chunk);
+      }
+      return writeFuture;
+    }
+
+    protected void sendError(ChannelHandlerContext ctx,
+        HttpResponseStatus status) {
+      sendError(ctx, "", status);
+    }
+
+    protected void sendError(ChannelHandlerContext ctx, String message,
+        HttpResponseStatus status) {
+      HttpResponse response = new DefaultHttpResponse(HTTP_1_1, status);
+      response.setHeader(CONTENT_TYPE, "text/plain; charset=UTF-8");
+      // Put shuffle version into http header
+      response.setHeader(ShuffleHeader.HTTP_HEADER_NAME,
+          ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
+      response.setHeader(ShuffleHeader.HTTP_HEADER_VERSION,
+          ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
+      response.setContent(
+        ChannelBuffers.copiedBuffer(message, CharsetUtil.UTF_8));
+
+      // Close the connection as soon as the error message is sent.
+      ctx.getChannel().write(response).addListener(ChannelFutureListener.CLOSE);
+    }
+
+    @Override
+    public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e)
+        throws Exception {
+      Channel ch = e.getChannel();
+      Throwable cause = e.getCause();
+      if (cause instanceof TooLongFrameException) {
+        sendError(ctx, BAD_REQUEST);
+        return;
+      } else if (cause instanceof IOException) {
+        if (cause instanceof ClosedChannelException) {
+          LOG.debug("Ignoring closed channel error", cause);
+          return;
+        }
+        String message = String.valueOf(cause.getMessage());
+        if (IGNORABLE_ERROR_MESSAGE.matcher(message).matches()) {
+          LOG.debug("Ignoring client socket close", cause);
+          return;
+        }
+      }
+
+      LOG.error("Shuffle error: ", cause);
+      if (ch.isConnected()) {
+        LOG.error("Shuffle error " + e);
+        sendError(ctx, INTERNAL_SERVER_ERROR);
+      }
+    }
+  }
+}


[22/43] tez git commit: TEZ-2090. Add tests for jobs running in external services. (sseth)

Posted by ss...@apache.org.
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
new file mode 100644
index 0000000..a93c1a4
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.tests;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.tez.client.TezClient;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezException;
+import org.apache.tez.dag.app.launcher.TezTestServiceNoOpContainerLauncher;
+import org.apache.tez.dag.app.rm.TezTestServiceTaskSchedulerService;
+import org.apache.tez.dag.app.taskcomm.TezTestServiceTaskCommunicatorImpl;
+import org.apache.tez.examples.HashJoinExample;
+import org.apache.tez.examples.JoinDataGen;
+import org.apache.tez.examples.JoinValidate;
+import org.apache.tez.service.MiniTezTestServiceCluster;
+import org.apache.tez.test.MiniTezCluster;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestExternalTezServices {
+
+  private static final Log LOG = LogFactory.getLog(TestExternalTezServices.class);
+
+  private static MiniTezCluster tezCluster;
+  private static MiniDFSCluster dfsCluster;
+  private static MiniTezTestServiceCluster tezTestServiceCluster;
+
+  private static Configuration clusterConf = new Configuration();
+  private static Configuration confForJobs;
+
+  private static FileSystem remoteFs;
+  private static FileSystem localFs;
+
+  private static TezClient sharedTezClient;
+
+  private static String TEST_ROOT_DIR = "target" + Path.SEPARATOR + TestExternalTezServices.class.getName()
+      + "-tmpDir";
+
+  @BeforeClass
+  public static void setup() throws IOException, TezException, InterruptedException {
+
+    localFs = FileSystem.getLocal(clusterConf);
+
+    try {
+      clusterConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR);
+      dfsCluster =
+          new MiniDFSCluster.Builder(clusterConf).numDataNodes(1).format(true).racks(null).build();
+      remoteFs = dfsCluster.getFileSystem();
+      LOG.info("MiniDFSCluster started");
+    } catch (IOException io) {
+      throw new RuntimeException("problem starting mini dfs cluster", io);
+    }
+
+    tezCluster = new MiniTezCluster(TestExternalTezServices.class.getName(), 1, 1, 1);
+    Configuration conf = new Configuration();
+    conf.set("fs.defaultFS", remoteFs.getUri().toString()); // use HDFS
+    tezCluster.init(conf);
+    tezCluster.start();
+    LOG.info("MiniTezCluster started");
+
+    clusterConf.set("fs.defaultFS", remoteFs.getUri().toString()); // use HDFS
+    for (Map.Entry<String, String> entry : tezCluster.getConfig()) {
+      clusterConf.set(entry.getKey(), entry.getValue());
+    }
+    long jvmMax = Runtime.getRuntime().maxMemory();
+
+    tezTestServiceCluster = MiniTezTestServiceCluster
+        .create(TestExternalTezServices.class.getSimpleName(), 3, ((long) (jvmMax * 0.5d)), 1);
+    tezTestServiceCluster.init(clusterConf);
+    tezTestServiceCluster.start();
+    LOG.info("MiniTezTestServer started");
+
+    confForJobs = new Configuration(clusterConf);
+    for (Map.Entry<String, String> entry : tezTestServiceCluster
+        .getClusterSpecificConfiguration()) {
+      confForJobs.set(entry.getKey(), entry.getValue());
+    }
+
+    // TODO TEZ-2003 Once per vertex configuration is possible, run separate tests for push vs pull (regular threaded execution)
+
+    Path stagingDirPath = new Path("/tmp/tez-staging-dir");
+    remoteFs.mkdirs(stagingDirPath);
+    // This is currently configured to push tasks into the Service, and then use the standard RPC
+    confForJobs.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
+    confForJobs.set(TezConfiguration.TEZ_AM_TASK_SCHEDULER_CLASS,
+        TezTestServiceTaskSchedulerService.class.getName());
+    confForJobs.set(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHER_CLASS,
+        TezTestServiceNoOpContainerLauncher.class.getName());
+    confForJobs.set(TezConfiguration.TEZ_AM_TASK_COMMUNICATOR_CLASS,
+        TezTestServiceTaskCommunicatorImpl.class.getName());
+
+    TezConfiguration tezConf = new TezConfiguration(confForJobs);
+
+    sharedTezClient = TezClient.create(TestExternalTezServices.class.getSimpleName() + "_session",
+        tezConf, true);
+    sharedTezClient.start();
+    LOG.info("Shared TezSession started");
+    sharedTezClient.waitTillReady();
+    LOG.info("Shared TezSession ready for submission");
+
+  }
+
+  @AfterClass
+  public static void tearDown() throws IOException, TezException {
+    if (sharedTezClient != null) {
+      sharedTezClient.stop();
+      sharedTezClient = null;
+    }
+
+    if (tezTestServiceCluster != null) {
+      tezTestServiceCluster.stop();
+      tezTestServiceCluster = null;
+    }
+
+    if (tezCluster != null) {
+      tezCluster.stop();
+      tezCluster = null;
+    }
+    if (dfsCluster != null) {
+      dfsCluster.shutdown();
+      dfsCluster = null;
+    }
+    // TODO Add cleanup code.
+  }
+
+
+  @Test(timeout = 60000)
+  public void test1() throws Exception {
+    Path testDir = new Path("/tmp/testHashJoinExample");
+
+    remoteFs.mkdirs(testDir);
+
+    Path dataPath1 = new Path(testDir, "inPath1");
+    Path dataPath2 = new Path(testDir, "inPath2");
+    Path expectedOutputPath = new Path(testDir, "expectedOutputPath");
+    Path outPath = new Path(testDir, "outPath");
+
+    TezConfiguration tezConf = new TezConfiguration(confForJobs);
+
+    JoinDataGen dataGen = new JoinDataGen();
+    String[] dataGenArgs = new String[]{
+        dataPath1.toString(), "1048576", dataPath2.toString(), "524288",
+        expectedOutputPath.toString(), "2"};
+    assertEquals(0, dataGen.run(tezConf, dataGenArgs, sharedTezClient));
+
+    HashJoinExample joinExample = new HashJoinExample();
+    String[] args = new String[]{
+        dataPath1.toString(), dataPath2.toString(), "2", outPath.toString()};
+    assertEquals(0, joinExample.run(tezConf, args, sharedTezClient));
+
+    JoinValidate joinValidate = new JoinValidate();
+    String[] validateArgs = new String[]{
+        expectedOutputPath.toString(), outPath.toString(), "3"};
+    assertEquals(0, joinValidate.run(tezConf, validateArgs, sharedTezClient));
+
+    // Ensure this was actually submitted to the external cluster
+    assertTrue(tezTestServiceCluster.getNumSubmissions() > 0);
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/util/ProtoConverters.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/util/ProtoConverters.java b/tez-ext-service-tests/src/test/java/org/apache/tez/util/ProtoConverters.java
new file mode 100644
index 0000000..60ebc53
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/util/ProtoConverters.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.util;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.tez.dag.api.DagTypeConverters;
+import org.apache.tez.dag.api.InputDescriptor;
+import org.apache.tez.dag.api.OutputDescriptor;
+import org.apache.tez.dag.api.ProcessorDescriptor;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.runtime.api.impl.GroupInputSpec;
+import org.apache.tez.runtime.api.impl.InputSpec;
+import org.apache.tez.runtime.api.impl.OutputSpec;
+import org.apache.tez.runtime.api.impl.TaskSpec;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.GroupInputSpecProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.IOSpecProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.TaskSpecProto;
+
+public class ProtoConverters {
+
+  public static TaskSpec getTaskSpecfromProto(TaskSpecProto taskSpecProto) {
+    TezTaskAttemptID taskAttemptID =
+        TezTaskAttemptID.fromString(taskSpecProto.getTaskAttemptIdString());
+
+    ProcessorDescriptor processorDescriptor = null;
+    if (taskSpecProto.hasProcessorDescriptor()) {
+      processorDescriptor = DagTypeConverters
+          .convertProcessorDescriptorFromDAGPlan(taskSpecProto.getProcessorDescriptor());
+    }
+
+    List<InputSpec> inputSpecList = new ArrayList<InputSpec>(taskSpecProto.getInputSpecsCount());
+    if (taskSpecProto.getInputSpecsCount() > 0) {
+      for (IOSpecProto inputSpecProto : taskSpecProto.getInputSpecsList()) {
+        inputSpecList.add(getInputSpecFromProto(inputSpecProto));
+      }
+    }
+
+    List<OutputSpec> outputSpecList =
+        new ArrayList<OutputSpec>(taskSpecProto.getOutputSpecsCount());
+    if (taskSpecProto.getOutputSpecsCount() > 0) {
+      for (IOSpecProto outputSpecProto : taskSpecProto.getOutputSpecsList()) {
+        outputSpecList.add(getOutputSpecFromProto(outputSpecProto));
+      }
+    }
+
+    List<GroupInputSpec> groupInputSpecs =
+        new ArrayList<GroupInputSpec>(taskSpecProto.getGroupedInputSpecsCount());
+    if (taskSpecProto.getGroupedInputSpecsCount() > 0) {
+      for (GroupInputSpecProto groupInputSpecProto : taskSpecProto.getGroupedInputSpecsList()) {
+        groupInputSpecs.add(getGroupInputSpecFromProto(groupInputSpecProto));
+      }
+    }
+
+    TaskSpec taskSpec =
+        new TaskSpec(taskAttemptID, taskSpecProto.getDagName(), taskSpecProto.getVertexName(),
+            taskSpecProto.getVertexParallelism(), processorDescriptor, inputSpecList,
+            outputSpecList, groupInputSpecs);
+    return taskSpec;
+  }
+
+  public static TaskSpecProto convertTaskSpecToProto(TaskSpec taskSpec) {
+    TaskSpecProto.Builder builder = TaskSpecProto.newBuilder();
+    builder.setTaskAttemptIdString(taskSpec.getTaskAttemptID().toString());
+    builder.setDagName(taskSpec.getDAGName());
+    builder.setVertexName(taskSpec.getVertexName());
+    builder.setVertexParallelism(taskSpec.getVertexParallelism());
+
+    if (taskSpec.getProcessorDescriptor() != null) {
+      builder.setProcessorDescriptor(
+          DagTypeConverters.convertToDAGPlan(taskSpec.getProcessorDescriptor()));
+    }
+
+    if (taskSpec.getInputs() != null && !taskSpec.getInputs().isEmpty()) {
+      for (InputSpec inputSpec : taskSpec.getInputs()) {
+        builder.addInputSpecs(convertInputSpecToProto(inputSpec));
+      }
+    }
+
+    if (taskSpec.getOutputs() != null && !taskSpec.getOutputs().isEmpty()) {
+      for (OutputSpec outputSpec : taskSpec.getOutputs()) {
+        builder.addOutputSpecs(convertOutputSpecToProto(outputSpec));
+      }
+    }
+
+    if (taskSpec.getGroupInputs() != null && !taskSpec.getGroupInputs().isEmpty()) {
+      for (GroupInputSpec groupInputSpec : taskSpec.getGroupInputs()) {
+        builder.addGroupedInputSpecs(convertGroupInputSpecToProto(groupInputSpec));
+
+      }
+    }
+    return builder.build();
+  }
+
+
+  public static InputSpec getInputSpecFromProto(IOSpecProto inputSpecProto) {
+    InputDescriptor inputDescriptor = null;
+    if (inputSpecProto.hasIoDescriptor()) {
+      inputDescriptor =
+          DagTypeConverters.convertInputDescriptorFromDAGPlan(inputSpecProto.getIoDescriptor());
+    }
+    InputSpec inputSpec = new InputSpec(inputSpecProto.getConnectedVertexName(), inputDescriptor,
+        inputSpecProto.getPhysicalEdgeCount());
+    return inputSpec;
+  }
+
+  public static IOSpecProto convertInputSpecToProto(InputSpec inputSpec) {
+    IOSpecProto.Builder builder = IOSpecProto.newBuilder();
+    if (inputSpec.getSourceVertexName() != null) {
+      builder.setConnectedVertexName(inputSpec.getSourceVertexName());
+    }
+    if (inputSpec.getInputDescriptor() != null) {
+      builder.setIoDescriptor(DagTypeConverters.convertToDAGPlan(inputSpec.getInputDescriptor()));
+    }
+    builder.setPhysicalEdgeCount(inputSpec.getPhysicalEdgeCount());
+    return builder.build();
+  }
+
+  public static OutputSpec getOutputSpecFromProto(IOSpecProto outputSpecProto) {
+    OutputDescriptor outputDescriptor = null;
+    if (outputSpecProto.hasIoDescriptor()) {
+      outputDescriptor =
+          DagTypeConverters.convertOutputDescriptorFromDAGPlan(outputSpecProto.getIoDescriptor());
+    }
+    OutputSpec outputSpec =
+        new OutputSpec(outputSpecProto.getConnectedVertexName(), outputDescriptor,
+            outputSpecProto.getPhysicalEdgeCount());
+    return outputSpec;
+  }
+
+  public static IOSpecProto convertOutputSpecToProto(OutputSpec outputSpec) {
+    IOSpecProto.Builder builder = IOSpecProto.newBuilder();
+    if (outputSpec.getDestinationVertexName() != null) {
+      builder.setConnectedVertexName(outputSpec.getDestinationVertexName());
+    }
+    if (outputSpec.getOutputDescriptor() != null) {
+      builder.setIoDescriptor(DagTypeConverters.convertToDAGPlan(outputSpec.getOutputDescriptor()));
+    }
+    builder.setPhysicalEdgeCount(outputSpec.getPhysicalEdgeCount());
+    return builder.build();
+  }
+
+  public static GroupInputSpec getGroupInputSpecFromProto(GroupInputSpecProto groupInputSpecProto) {
+    GroupInputSpec groupSpec = new GroupInputSpec(groupInputSpecProto.getGroupName(),
+        groupInputSpecProto.getGroupVerticesList(), DagTypeConverters
+        .convertInputDescriptorFromDAGPlan(groupInputSpecProto.getMergedInputDescriptor()));
+    return groupSpec;
+  }
+
+  public static GroupInputSpecProto convertGroupInputSpecToProto(GroupInputSpec groupInputSpec) {
+    GroupInputSpecProto.Builder builder = GroupInputSpecProto.newBuilder();
+    builder.setGroupName(groupInputSpec.getGroupName());
+    builder.addAllGroupVertices(groupInputSpec.getGroupVertices());
+    builder.setMergedInputDescriptor(
+        DagTypeConverters.convertToDAGPlan(groupInputSpec.getMergedInputDescriptor()));
+    return builder.build();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/proto/TezDaemonProtocol.proto
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/proto/TezDaemonProtocol.proto b/tez-ext-service-tests/src/test/proto/TezDaemonProtocol.proto
new file mode 100644
index 0000000..2f8b2e6
--- /dev/null
+++ b/tez-ext-service-tests/src/test/proto/TezDaemonProtocol.proto
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option java_package = "org.apache.tez.test.service.rpc";
+option java_outer_classname = "TezTestServiceProtocolProtos";
+option java_generic_services = true;
+option java_generate_equals_and_hash = true;
+
+import "DAGApiRecords.proto";
+
+message IOSpecProto {
+  optional string connected_vertex_name = 1;
+  optional TezEntityDescriptorProto io_descriptor = 2;
+  optional int32 physical_edge_count = 3;
+}
+
+message GroupInputSpecProto {
+  optional string group_name = 1;
+  repeated string group_vertices = 2;
+  optional TezEntityDescriptorProto merged_input_descriptor = 3;
+}
+
+message TaskSpecProto {
+  optional string task_attempt_id_string = 1;
+  optional string dag_name = 2;
+  optional string vertex_name = 3;
+  optional TezEntityDescriptorProto processor_descriptor = 4;
+  repeated IOSpecProto input_specs = 5;
+  repeated IOSpecProto output_specs = 6;
+  repeated GroupInputSpecProto grouped_input_specs = 7;
+  optional int32 vertex_parallelism = 8;
+}
+
+
+message SubmitWorkRequestProto {
+  optional string container_id_string = 1;
+  optional string am_host = 2;
+  optional int32 am_port = 3;
+  optional string token_identifier = 4;
+  optional bytes credentials_binary = 5;
+  optional string user = 6;
+  optional string application_id_string = 7;
+  optional int32 app_attempt_number = 8;
+  optional TaskSpecProto task_spec = 9;
+}
+
+message SubmitWorkResponseProto {
+}
+
+
+
+message RunContainerRequestProto {
+  optional string container_id_string = 1;
+  optional string am_host = 2;
+  optional int32 am_port = 3;
+  optional string token_identifier = 4;
+  optional bytes credentials_binary = 5;
+  optional string user = 6;
+  optional string application_id_string = 7;
+  optional int32 app_attempt_number = 8;
+}
+
+message RunContainerResponseProto {
+}
+
+service TezTestServiceProtocol {
+  rpc runContainer(RunContainerRequestProto) returns (RunContainerResponseProto);
+  rpc submitWork(SubmitWorkRequestProto) returns (SubmitWorkResponseProto);
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/resources/log4j.properties b/tez-ext-service-tests/src/test/resources/log4j.properties
new file mode 100644
index 0000000..531b68b
--- /dev/null
+++ b/tez-ext-service-tests/src/test/resources/log4j.properties
@@ -0,0 +1,19 @@
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+# log4j configuration used during build and unit tests
+
+log4j.rootLogger=info,stdout
+log4j.threshhold=ALL
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} (%F:%M(%L)) - %m%n

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
index fd55992..3cba3ce 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
@@ -389,7 +389,7 @@ public class TezChild {
     private final Throwable throwable;
     private final String errorMessage;
 
-    ContainerExecutionResult(ExitStatus exitStatus, @Nullable Throwable throwable,
+    public ContainerExecutionResult(ExitStatus exitStatus, @Nullable Throwable throwable,
                              @Nullable String errorMessage) {
       this.exitStatus = exitStatus;
       this.throwable = throwable;

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
index de83889..f54814b 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
@@ -67,7 +67,7 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
   private final AtomicBoolean taskRunning;
   private final AtomicBoolean shutdownRequested = new AtomicBoolean(false);
 
-  TezTaskRunner(Configuration tezConf, UserGroupInformation ugi, String[] localDirs,
+  public TezTaskRunner(Configuration tezConf, UserGroupInformation ugi, String[] localDirs,
       TaskSpec taskSpec, int appAttemptNumber,
       Map<String, ByteBuffer> serviceConsumerMetadata, Map<String, String> serviceProviderEnvMap,
       Multimap<String, String> startedInputsMap, TaskReporter taskReporter,


[24/43] tez git commit: TEZ-2090. Add tests for jobs running in external services. (sseth)

Posted by ss...@apache.org.
TEZ-2090. Add tests for jobs running in external services. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/aadd0492
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/aadd0492
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/aadd0492

Branch: refs/heads/TEZ-2003
Commit: aadd04927b0cc646db2579f17f903d8e24916bdc
Parents: 7b71d3b
Author: Siddharth Seth <ss...@apache.org>
Authored: Fri Feb 13 17:24:05 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:01 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                            |   1 +
 pom.xml                                         |   6 +
 .../apache/tez/dag/api/TezConfiguration.java    |   2 +
 .../apache/tez/dag/api/TaskCommunicator.java    |   1 +
 .../tez/dag/api/TaskCommunicatorContext.java    |   3 +
 .../tez/dag/app/TezTaskCommunicatorImpl.java    |  42 +-
 .../dag/app/rm/TaskSchedulerEventHandler.java   |   2 +-
 tez-ext-service-tests/pom.xml                   | 161 ++++
 .../tez/dag/app/TezTestServiceCommunicator.java | 152 ++++
 .../TezTestServiceContainerLauncher.java        | 144 ++++
 .../TezTestServiceNoOpContainerLauncher.java    |  66 ++
 .../rm/TezTestServiceTaskSchedulerService.java  | 347 ++++++++
 .../TezTestServiceTaskCommunicatorImpl.java     | 182 ++++
 .../org/apache/tez/service/ContainerRunner.java |  27 +
 .../tez/service/MiniTezTestServiceCluster.java  | 163 ++++
 .../service/TezTestServiceConfConstants.java    |  41 +
 .../TezTestServiceProtocolBlockingPB.java       |  22 +
 .../tez/service/impl/ContainerRunnerImpl.java   | 512 +++++++++++
 .../apache/tez/service/impl/TezTestService.java | 126 +++
 .../impl/TezTestServiceProtocolClientImpl.java  |  82 ++
 .../impl/TezTestServiceProtocolServerImpl.java  | 133 +++
 .../tez/shufflehandler/FadvisedChunkedFile.java |  78 ++
 .../tez/shufflehandler/FadvisedFileRegion.java  | 160 ++++
 .../apache/tez/shufflehandler/IndexCache.java   | 199 +++++
 .../tez/shufflehandler/ShuffleHandler.java      | 840 +++++++++++++++++++
 .../tez/tests/TestExternalTezServices.java      | 183 ++++
 .../org/apache/tez/util/ProtoConverters.java    | 172 ++++
 .../src/test/proto/TezDaemonProtocol.proto      |  84 ++
 .../src/test/resources/log4j.properties         |  19 +
 .../org/apache/tez/runtime/task/TezChild.java   |   2 +-
 .../apache/tez/runtime/task/TezTaskRunner.java  |   2 +-
 31 files changed, 3943 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index d7e4be5..975ce65 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -1,5 +1,6 @@
 ALL CHANGES:
   TEZ-2019. Temporarily allow the scheduler and launcher to be specified via configuration.
   TEZ-2006. Task communication plane needs to be pluggable.
+  TEZ-2090. Add tests for jobs running in external services.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index ce4fa13..ca9db11 100644
--- a/pom.xml
+++ b/pom.xml
@@ -170,6 +170,11 @@
         <type>test-jar</type>
       </dependency>
       <dependency>
+        <groupId>org.apache.tez</groupId>
+        <artifactId>tez-ext-service-tests</artifactId>
+        <version>${project.version}</version>
+      </dependency>
+      <dependency>
         <groupId>org.apache.pig</groupId>
         <artifactId>pig</artifactId>
         <version>${pig.version}</version>
@@ -638,6 +643,7 @@
     <module>tez-ui</module>
     <module>tez-plugins</module>
     <module>tez-tools</module>
+    <module>tez-ext-service-tests</module>
     <module>tez-dist</module>
     <module>docs</module>
   </modules>

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
index 01e724e..1cd478e 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
@@ -1172,6 +1172,8 @@ public class TezConfiguration extends Configuration {
   public static final String TEZ_AM_CONTAINER_LAUNCHER_CLASS = TEZ_AM_PREFIX + "container-launcher.class";
   @ConfigurationScope(Scope.VERTEX)
   public static final String TEZ_AM_TASK_SCHEDULER_CLASS = TEZ_AM_PREFIX + "task-scheduler.class";
+  @ConfigurationScope(Scope.VERTEX)
+  public static final String TEZ_AM_TASK_COMMUNICATOR_CLASS = TEZ_AM_PREFIX + "task-communicator.class";
 
 
   // TODO only validate property here, value can also be validated if necessary

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
index 97f9c16..c9f85e0 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
@@ -14,6 +14,7 @@
 
 package org.apache.tez.dag.api;
 
+import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.util.Map;
 

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
index 9b2d889..41675fe 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
@@ -44,5 +44,8 @@ public interface TaskCommunicatorContext {
   // TODO TEZ-2003 Move to vertex, taskIndex, version
   void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId);
 
+  // TODO TEZ-2003 Add an API to register task failure - for example, a communication failure.
+  // This will have to take into consideration the TA_FAILED event
+
   // TODO Eventually Add methods to report availability stats to the scheduler.
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
index 5652937..258c927 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -74,16 +74,22 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
       new ConcurrentHashMap<TaskAttempt, ContainerId>();
 
   private final TezTaskUmbilicalProtocol taskUmbilical;
+  private final String tokenIdentifier;
+  private final Token<JobTokenIdentifier> sessionToken;
   private InetSocketAddress address;
   private Server server;
 
-  private static final class ContainerInfo {
+  public static final class ContainerInfo {
 
-    ContainerInfo(ContainerId containerId) {
+    ContainerInfo(ContainerId containerId, String host, int port) {
       this.containerId = containerId;
+      this.host = host;
+      this.port = port;
     }
 
-    ContainerId containerId;
+    final ContainerId containerId;
+    public final String host;
+    public final int port;
     TezHeartbeatResponse lastResponse = null;
     TaskSpec taskSpec = null;
     long lastRequestId = 0;
@@ -110,6 +116,8 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
     super(TezTaskCommunicatorImpl.class.getName());
     this.taskCommunicatorContext = taskCommunicatorContext;
     this.taskUmbilical = new TezTaskUmbilicalProtocolImpl();
+    this.tokenIdentifier = this.taskCommunicatorContext.getApplicationAttemptId().getApplicationId().toString();
+    this.sessionToken = TokenCache.getSessionToken(taskCommunicatorContext.getCredentials());
   }
 
 
@@ -130,9 +138,7 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
       try {
         JobTokenSecretManager jobTokenSecretManager =
             new JobTokenSecretManager();
-        Token<JobTokenIdentifier> sessionToken = TokenCache.getSessionToken(taskCommunicatorContext.getCredentials());
-        jobTokenSecretManager.addTokenForJob(
-            taskCommunicatorContext.getApplicationAttemptId().getApplicationId().toString(), sessionToken);
+        jobTokenSecretManager.addTokenForJob(tokenIdentifier, sessionToken);
 
         server = new RPC.Builder(conf)
             .setProtocol(TezTaskUmbilicalProtocol.class)
@@ -182,7 +188,7 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
 
   @Override
   public void registerRunningContainer(ContainerId containerId, String host, int port) {
-    ContainerInfo oldInfo = registeredContainers.putIfAbsent(containerId, new ContainerInfo(containerId));
+    ContainerInfo oldInfo = registeredContainers.putIfAbsent(containerId, new ContainerInfo(containerId, host, port));
     if (oldInfo != null) {
       throw new TezUncheckedException("Multiple registrations for containerId: " + containerId);
     }
@@ -230,9 +236,9 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
                 ". Already registered to containerId: " + oldId);
       }
     }
-
   }
 
+
   @Override
   public void unregisterRunningTaskAttempt(TezTaskAttemptID taskAttemptID) {
     TaskAttempt taskAttempt = new TaskAttempt(taskAttemptID);
@@ -258,6 +264,18 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
     return address;
   }
 
+  protected String getTokenIdentifier() {
+    return tokenIdentifier;
+  }
+
+  protected Token<JobTokenIdentifier> getSessionToken() {
+    return sessionToken;
+  }
+
+  protected TaskCommunicatorContext getTaskCommunicatorContext() {
+    return taskCommunicatorContext;
+  }
+
   public TezTaskUmbilicalProtocol getUmbilical() {
     return this.taskUmbilical;
   }
@@ -471,4 +489,12 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
       return "TaskAttempt{" + "taskAttemptId=" + taskAttemptId + '}';
     }
   }
+
+  protected ContainerInfo getContainerInfo(ContainerId containerId) {
+    return registeredContainers.get(containerId);
+  }
+
+  protected ContainerId getContainerForAttempt(TezTaskAttemptID taskAttemptId) {
+    return attemptToContainerMap.get(new TaskAttempt(taskAttemptId));
+  }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
index 62f82db..8c3ed87 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
@@ -350,7 +350,7 @@ public class TaskSchedulerEventHandler extends AbstractService
         try {
           Constructor<? extends TaskSchedulerService> ctor = taskSchedulerClazz
               .getConstructor(TaskSchedulerAppCallback.class, AppContext.class, String.class,
-                  Integer.class, String.class, Configuration.class);
+                  int.class, String.class, Configuration.class);
           ctor.setAccessible(true);
           TaskSchedulerService taskSchedulerService =
               ctor.newInstance(this, appContext, host, port, trackingUrl, getConfig());

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/pom.xml
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/pom.xml b/tez-ext-service-tests/pom.xml
new file mode 100644
index 0000000..37f68b1
--- /dev/null
+++ b/tez-ext-service-tests/pom.xml
@@ -0,0 +1,161 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed under the Apache License, Version 2.0 (the "License");
+  ~ you may not use this file except in compliance with the License.
+  ~ You may obtain a copy of the License at
+  ~
+  ~     http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <artifactId>tez</artifactId>
+    <groupId>org.apache.tez</groupId>
+    <version>0.7.0-SNAPSHOT</version>
+  </parent>
+
+  <!-- TODO TEZ-2003 Merge this into the tez-tests module -->
+  <artifactId>tez-ext-service-tests</artifactId>
+
+  <dependencies>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.tez</groupId>
+      <artifactId>tez-runtime-internals</artifactId>
+    </dependency>
+    <dependency>
+      <!-- Required for the ShuffleHandler -->
+      <groupId>org.apache.tez</groupId>
+      <artifactId>tez-runtime-library</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.tez</groupId>
+      <artifactId>tez-dag</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.tez</groupId>
+      <artifactId>tez-tests</artifactId>
+      <scope>test</scope>
+      <type>test-jar</type>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+      <scope>test</scope>
+      <type>test-jar</type>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-server-tests</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+
+  <build>
+    <!--
+     Include all files in src/main/resources.  By default, do not apply property
+     substitution (filtering=false), but do apply property substitution to
+     version-info.properties (filtering=true).  This will substitute the
+     version information correctly, but prevent Maven from altering other files.
+     -->
+    <resources>
+      <resource>
+        <directory>${basedir}/src/main/resources</directory>
+        <excludes>
+          <exclude>tez-api-version-info.properties</exclude>
+        </excludes>
+        <filtering>false</filtering>
+      </resource>
+      <resource>
+        <directory>${basedir}/src/main/resources</directory>
+        <includes>
+          <include>tez-api-version-info.properties</include>
+        </includes>
+        <filtering>true</filtering>
+      </resource>
+    </resources>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-maven-plugins</artifactId>
+        <executions>
+          <execution>
+            <id>compile-protoc</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>protoc</goal>
+            </goals>
+            <configuration>
+              <protocVersion>${protobuf.version}</protocVersion>
+              <protocCommand>${protoc.path}</protocCommand>
+              <imports>
+                <param>${basedir}/src/test/proto</param>
+                <param>${basedir}/../tez-api/src/main/proto</param>
+              </imports>
+              <source>
+                <directory>${basedir}/src/test/proto</directory>
+                <includes>
+                  <include>TezDaemonProtocol.proto</include>
+                </includes>
+              </source>
+              <output>${project.build.directory}/generated-test-sources/java</output>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+</project>

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/TezTestServiceCommunicator.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/TezTestServiceCommunicator.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/TezTestServiceCommunicator.java
new file mode 100644
index 0000000..ac50878
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/TezTestServiceCommunicator.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app;
+
+
+import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import com.google.common.util.concurrent.FutureCallback;
+import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.common.util.concurrent.ListeningExecutorService;
+import com.google.common.util.concurrent.MoreExecutors;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import com.google.protobuf.Message;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.tez.service.TezTestServiceProtocolBlockingPB;
+import org.apache.tez.service.impl.TezTestServiceProtocolClientImpl;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerResponseProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkResponseProto;
+
+public class TezTestServiceCommunicator extends AbstractService {
+
+  private final ConcurrentMap<String, TezTestServiceProtocolBlockingPB> hostProxies;
+  private final ListeningExecutorService executor;
+
+  // TODO Convert this into a singleton
+  public TezTestServiceCommunicator(int numThreads) {
+    super(TezTestServiceCommunicator.class.getSimpleName());
+    ExecutorService localExecutor = Executors.newFixedThreadPool(numThreads,
+        new ThreadFactoryBuilder().setNameFormat("TezTestServiceCommunicator #%2d").build());
+    this.hostProxies = new ConcurrentHashMap<String, TezTestServiceProtocolBlockingPB>();
+    executor = MoreExecutors.listeningDecorator(localExecutor);
+  }
+
+  @Override
+  public void serviceStop() {
+    executor.shutdownNow();
+  }
+
+
+  public void runContainer(RunContainerRequestProto request, String host, int port,
+                           final ExecuteRequestCallback<RunContainerResponseProto> callback) {
+    ListenableFuture<RunContainerResponseProto> future = executor.submit(new RunContainerCallable(request, host, port));
+    Futures.addCallback(future, new FutureCallback<RunContainerResponseProto>() {
+      @Override
+      public void onSuccess(RunContainerResponseProto result) {
+        callback.setResponse(result);
+      }
+
+      @Override
+      public void onFailure(Throwable t) {
+        callback.indicateError(t);
+      }
+    });
+
+  }
+
+  public void submitWork(SubmitWorkRequestProto request, String host, int port,
+                         final ExecuteRequestCallback<SubmitWorkResponseProto> callback) {
+    ListenableFuture<SubmitWorkResponseProto> future = executor.submit(new SubmitWorkCallable(request, host, port));
+    Futures.addCallback(future, new FutureCallback<SubmitWorkResponseProto>() {
+      @Override
+      public void onSuccess(SubmitWorkResponseProto result) {
+        callback.setResponse(result);
+      }
+
+      @Override
+      public void onFailure(Throwable t) {
+        callback.indicateError(t);
+      }
+    });
+
+  }
+
+
+  private class RunContainerCallable implements Callable<RunContainerResponseProto> {
+
+    final String hostname;
+    final int port;
+    final RunContainerRequestProto request;
+
+    private RunContainerCallable(RunContainerRequestProto request, String hostname, int port) {
+      this.hostname = hostname;
+          this.port = port;
+      this.request = request;
+    }
+
+    @Override
+    public RunContainerResponseProto call() throws Exception {
+      return getProxy(hostname, port).runContainer(null, request);
+    }
+  }
+
+  private class SubmitWorkCallable implements Callable<SubmitWorkResponseProto> {
+    final String hostname;
+    final int port;
+    final SubmitWorkRequestProto request;
+
+    private SubmitWorkCallable(SubmitWorkRequestProto request, String hostname, int port) {
+      this.hostname = hostname;
+      this.port = port;
+      this.request = request;
+    }
+
+    @Override
+    public SubmitWorkResponseProto call() throws Exception {
+      return getProxy(hostname, port).submitWork(null, request);
+    }
+  }
+
+  public interface ExecuteRequestCallback<T extends Message> {
+    void setResponse(T response);
+    void indicateError(Throwable t);
+  }
+
+  private TezTestServiceProtocolBlockingPB getProxy(String hostname, int port) {
+    String hostId = getHostIdentifier(hostname, port);
+
+    TezTestServiceProtocolBlockingPB proxy = hostProxies.get(hostId);
+    if (proxy == null) {
+      proxy = new TezTestServiceProtocolClientImpl(getConfig(), hostname, port);
+      TezTestServiceProtocolBlockingPB proxyOld = hostProxies.putIfAbsent(hostId, proxy);
+      if (proxyOld != null) {
+        // TODO Shutdown the new proxy.
+        proxy = proxyOld;
+      }
+    }
+    return proxy;
+  }
+
+  private String getHostIdentifier(String hostname, int port) {
+    return hostname + ":" + port;
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
new file mode 100644
index 0000000..e83165b
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app.launcher;
+
+import com.google.common.base.Preconditions;
+import com.google.protobuf.ByteString;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.util.Clock;
+import org.apache.tez.dag.app.AppContext;
+import org.apache.tez.dag.app.TaskAttemptListener;
+import org.apache.tez.dag.app.TezTestServiceCommunicator;
+import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
+import org.apache.tez.dag.app.rm.NMCommunicatorLaunchRequestEvent;
+import org.apache.tez.dag.app.rm.container.AMContainerEvent;
+import org.apache.tez.dag.app.rm.container.AMContainerEventLaunchFailed;
+import org.apache.tez.dag.app.rm.container.AMContainerEventLaunched;
+import org.apache.tez.dag.app.rm.container.AMContainerEventType;
+import org.apache.tez.dag.history.DAGHistoryEvent;
+import org.apache.tez.dag.history.events.ContainerLaunchedEvent;
+import org.apache.tez.service.TezTestServiceConfConstants;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+
+public class TezTestServiceContainerLauncher extends AbstractService implements ContainerLauncher {
+
+  // TODO Support interruptability of tasks which haven't yet been launched.
+
+  // TODO May need multiple connections per target machine, depending upon how synchronization is handled in the RPC layer
+
+  static final Log LOG = LogFactory.getLog(TezTestServiceContainerLauncher.class);
+
+  private final AppContext context;
+  private final String tokenIdentifier;
+  private final TaskAttemptListener tal;
+  private final int servicePort;
+  private final TezTestServiceCommunicator communicator;
+  private final Clock clock;
+
+
+  // Configuration passed in here to set up final parameters
+  public TezTestServiceContainerLauncher(AppContext appContext, Configuration conf,
+                                         TaskAttemptListener tal) {
+    super(TezTestServiceContainerLauncher.class.getName());
+    this.clock = appContext.getClock();
+    int numThreads = conf.getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_AM_COMMUNICATOR_NUM_THREADS,
+        TezTestServiceConfConstants.TEZ_TEST_SERVICE_AM_COMMUNICATOR_NUM_THREADS_DEFAULT);
+
+    this.servicePort = conf.getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_RPC_PORT, -1);
+    Preconditions.checkArgument(servicePort > 0,
+        TezTestServiceConfConstants.TEZ_TEST_SERVICE_RPC_PORT + " must be set");
+    this.communicator = new TezTestServiceCommunicator(numThreads);
+    this.context = appContext;
+    this.tokenIdentifier = context.getApplicationID().toString();
+    this.tal = tal;
+  }
+
+  @Override
+  public void serviceInit(Configuration conf) {
+    communicator.init(conf);
+  }
+
+  @Override
+  public void serviceStart() {
+    communicator.start();
+  }
+
+  @Override
+  public void serviceStop() {
+    communicator.stop();
+  }
+
+  @Override
+  public void handle(NMCommunicatorEvent event) {
+    switch (event.getType()) {
+      case CONTAINER_LAUNCH_REQUEST:
+        final NMCommunicatorLaunchRequestEvent launchEvent = (NMCommunicatorLaunchRequestEvent) event;
+        RunContainerRequestProto runRequest = constructRunContainerRequest(launchEvent);
+        communicator.runContainer(runRequest, launchEvent.getNodeId().getHost(),
+            launchEvent.getNodeId().getPort(),
+            new TezTestServiceCommunicator.ExecuteRequestCallback<TezTestServiceProtocolProtos.RunContainerResponseProto>() {
+              @Override
+              public void setResponse(TezTestServiceProtocolProtos.RunContainerResponseProto response) {
+                LOG.info("Container: " + launchEvent.getContainerId() + " launch succeeded on host: " + launchEvent.getNodeId());
+                context.getEventHandler().handle(new AMContainerEventLaunched(launchEvent.getContainerId()));
+                ContainerLaunchedEvent lEvt = new ContainerLaunchedEvent(
+                    launchEvent.getContainerId(), clock.getTime(), context.getApplicationAttemptId());
+                context.getHistoryHandler().handle(new DAGHistoryEvent(
+                    null, lEvt));
+              }
+
+              @Override
+              public void indicateError(Throwable t) {
+                LOG.error("Failed to launch container: " + launchEvent.getContainer() + " on host: " + launchEvent.getNodeId(), t);
+                sendContainerLaunchFailedMsg(launchEvent.getContainerId(), t);
+              }
+            });
+        break;
+      case CONTAINER_STOP_REQUEST:
+        LOG.info("DEBUG: Ignoring STOP_REQUEST for event: " + event);
+        // that the container is actually done (normally received from RM)
+        // TODO Sending this out for an un-launched container is invalid
+        context.getEventHandler().handle(new AMContainerEvent(event.getContainerId(),
+            AMContainerEventType.C_NM_STOP_SENT));
+        break;
+    }
+  }
+
+  private RunContainerRequestProto constructRunContainerRequest(NMCommunicatorLaunchRequestEvent event) {
+    RunContainerRequestProto.Builder builder = RunContainerRequestProto.newBuilder();
+    builder.setAmHost(tal.getAddress().getHostName()).setAmPort(tal.getAddress().getPort());
+    builder.setAppAttemptNumber(event.getContainer().getId().getApplicationAttemptId().getAttemptId());
+    builder.setApplicationIdString(
+        event.getContainer().getId().getApplicationAttemptId().getApplicationId().toString());
+    builder.setTokenIdentifier(tokenIdentifier);
+    builder.setContainerIdString(event.getContainer().getId().toString());
+    builder.setCredentialsBinary(
+        ByteString.copyFrom(event.getContainerLaunchContext().getTokens()));
+    // TODO Avoid reading this from the environment
+    builder.setUser(System.getenv(ApplicationConstants.Environment.USER.name()));
+    return builder.build();
+  }
+
+  @SuppressWarnings("unchecked")
+  void sendContainerLaunchFailedMsg(ContainerId containerId, Throwable t) {
+    context.getEventHandler().handle(new AMContainerEventLaunchFailed(containerId, t == null ? "" : t.getMessage()));
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java
new file mode 100644
index 0000000..8c8e486
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app.launcher;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.util.Clock;
+import org.apache.tez.dag.app.AppContext;
+import org.apache.tez.dag.app.TaskAttemptListener;
+import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
+import org.apache.tez.dag.app.rm.NMCommunicatorLaunchRequestEvent;
+import org.apache.tez.dag.app.rm.container.AMContainerEvent;
+import org.apache.tez.dag.app.rm.container.AMContainerEventLaunched;
+import org.apache.tez.dag.app.rm.container.AMContainerEventType;
+import org.apache.tez.dag.history.DAGHistoryEvent;
+import org.apache.tez.dag.history.events.ContainerLaunchedEvent;
+
+public class TezTestServiceNoOpContainerLauncher extends AbstractService implements ContainerLauncher {
+
+  static final Log LOG = LogFactory.getLog(TezTestServiceNoOpContainerLauncher.class);
+
+  private final AppContext context;
+  private final Clock clock;
+
+  public TezTestServiceNoOpContainerLauncher(AppContext appContext, Configuration conf,
+                                         TaskAttemptListener tal) {
+    super(TezTestServiceNoOpContainerLauncher.class.getName());
+    this.context = appContext;
+    this.clock = appContext.getClock();
+  }
+
+  @Override
+  public void handle(NMCommunicatorEvent event) {
+    switch(event.getType()) {
+      case CONTAINER_LAUNCH_REQUEST:
+        final NMCommunicatorLaunchRequestEvent launchEvent = (NMCommunicatorLaunchRequestEvent) event;
+        LOG.info("No-op launch for container: " + launchEvent.getContainerId() + " succeeded on host: " + launchEvent.getNodeId());
+        context.getEventHandler().handle(new AMContainerEventLaunched(launchEvent.getContainerId()));
+        ContainerLaunchedEvent lEvt = new ContainerLaunchedEvent(
+            launchEvent.getContainerId(), clock.getTime(), context.getApplicationAttemptId());
+        context.getHistoryHandler().handle(new DAGHistoryEvent(
+            null, lEvt));
+        break;
+      case CONTAINER_STOP_REQUEST:
+        LOG.info("DEBUG: Ignoring STOP_REQUEST for event: " + event);
+        context.getEventHandler().handle(new AMContainerEvent(event.getContainerId(),
+            AMContainerEventType.C_NM_STOP_SENT));
+        break;
+    }
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
new file mode 100644
index 0000000..e3c18bf
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
@@ -0,0 +1,347 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app.rm;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.primitives.Ints;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.NodeReport;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.client.api.AMRMClient;
+import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.tez.dag.api.TezUncheckedException;
+import org.apache.tez.dag.app.AppContext;
+import org.apache.tez.service.TezTestServiceConfConstants;
+
+
+// TODO Registration with RM - so that the AM is considered dead and restarted in the expiry interval - 10 minutes.
+
+public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
+
+  private static final Log LOG = LogFactory.getLog(TezTestServiceTaskSchedulerService.class);
+
+  private final ExecutorService appCallbackExecutor;
+  private final TaskSchedulerAppCallback appClientDelegate;
+  private final AppContext appContext;
+  private final List<String> serviceHosts;
+  private final ContainerFactory containerFactory;
+  private final Random random = new Random();
+  // Currently all services must be running on the same port.
+  private final int containerPort;
+
+  private final String clientHostname;
+  private final int clientPort;
+  private final String trackingUrl;
+  private final AtomicBoolean isStopped = new AtomicBoolean(false);
+  private final ConcurrentMap<Object, ContainerId> runningTasks =
+      new ConcurrentHashMap<Object, ContainerId>();
+
+  private final AMRMClientAsync<AMRMClient.ContainerRequest> amRmClient;
+
+  // Per instance
+  private final int memoryPerInstance;
+  private final int coresPerInstance;
+  private final int executorsPerInstance;
+
+  // Per Executor Thread
+  private final Resource resourcePerContainer;
+
+
+  public TezTestServiceTaskSchedulerService(TaskSchedulerAppCallback appClient,
+                                            AppContext appContext,
+                                            String clientHostname, int clientPort,
+                                            String trackingUrl,
+                                            Configuration conf) {
+    // Accepting configuration here to allow setting up fields as final
+    super(TezTestServiceTaskSchedulerService.class.getName());
+    this.appCallbackExecutor = createAppCallbackExecutorService();
+    this.appClientDelegate = createAppCallbackDelegate(appClient);
+    this.appContext = appContext;
+    this.serviceHosts = new LinkedList<String>();
+    this.containerFactory = new ContainerFactory(appContext);
+
+    this.memoryPerInstance = conf
+        .getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB, -1);
+    Preconditions.checkArgument(memoryPerInstance > 0,
+        TezTestServiceConfConstants.TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB +
+            " must be configured");
+
+    this.executorsPerInstance = conf.getInt(
+        TezTestServiceConfConstants.TEZ_TEST_SERVICE_NUM_EXECUTORS_PER_INSTANCE,
+        -1);
+    Preconditions.checkArgument(executorsPerInstance > 0,
+        TezTestServiceConfConstants.TEZ_TEST_SERVICE_NUM_EXECUTORS_PER_INSTANCE +
+            " must be configured");
+
+    this.coresPerInstance = conf
+        .getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_VCPUS_PER_INSTANCE,
+            executorsPerInstance);
+
+    this.containerPort = conf.getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_RPC_PORT, -1);
+    Preconditions.checkArgument(executorsPerInstance > 0,
+        TezTestServiceConfConstants.TEZ_TEST_SERVICE_RPC_PORT + " must be configured");
+
+    this.clientHostname = clientHostname;
+    this.clientPort = clientPort;
+    this.trackingUrl = trackingUrl;
+
+    int memoryPerContainer = (int) (memoryPerInstance / (float) executorsPerInstance);
+    int coresPerContainer = (int) (coresPerInstance / (float) executorsPerInstance);
+    this.resourcePerContainer = Resource.newInstance(memoryPerContainer, coresPerContainer);
+    this.amRmClient = TezAMRMClientAsync.createAMRMClientAsync(5000, new FakeAmRmCallbackHandler());
+
+    String[] hosts = conf.getTrimmedStrings(TezTestServiceConfConstants.TEZ_TEST_SERVICE_HOSTS);
+    if (hosts == null || hosts.length == 0) {
+      hosts = new String[]{"localhost"};
+    }
+    for (String host : hosts) {
+      serviceHosts.add(host);
+    }
+
+    LOG.info("Running with configuration: " +
+        "memoryPerInstance=" + memoryPerInstance +
+        ", vcoresPerInstance=" + coresPerInstance +
+        ", executorsPerInstance=" + executorsPerInstance +
+        ", resourcePerContainerInferred=" + resourcePerContainer +
+        ", hosts=" + serviceHosts.toString());
+
+  }
+
+  @Override
+  public void serviceInit(Configuration conf) {
+    amRmClient.init(conf);
+  }
+
+  @Override
+  public void serviceStart() {
+    amRmClient.start();
+    RegisterApplicationMasterResponse response;
+    try {
+      amRmClient.registerApplicationMaster(clientHostname, clientPort, trackingUrl);
+    } catch (YarnException e) {
+      throw new TezUncheckedException(e);
+    } catch (IOException e) {
+      throw new TezUncheckedException(e);
+    }
+  }
+
+  @Override
+  public void serviceStop() {
+    if (!this.isStopped.getAndSet(true)) {
+
+      try {
+        TaskSchedulerAppCallback.AppFinalStatus status = appClientDelegate.getFinalAppStatus();
+        amRmClient.unregisterApplicationMaster(status.exitStatus, status.exitMessage,
+            status.postCompletionTrackingUrl);
+      } catch (YarnException e) {
+        throw new TezUncheckedException(e);
+      } catch (IOException e) {
+        throw new TezUncheckedException(e);
+      }
+      appCallbackExecutor.shutdownNow();
+    }
+  }
+
+  @Override
+  public Resource getAvailableResources() {
+    // TODO This needs information about all running executors, and the amount of memory etc available across the cluster.
+    return Resource
+        .newInstance(Ints.checkedCast(serviceHosts.size() * memoryPerInstance),
+            serviceHosts.size() * coresPerInstance);
+  }
+
+  @Override
+  public int getClusterNodeCount() {
+    return serviceHosts.size();
+  }
+
+  @Override
+  public void resetMatchLocalityForAllHeldContainers() {
+  }
+
+  @Override
+  public Resource getTotalResources() {
+    return Resource
+        .newInstance(Ints.checkedCast(serviceHosts.size() * memoryPerInstance),
+            serviceHosts.size() * coresPerInstance);
+  }
+
+  @Override
+  public void blacklistNode(NodeId nodeId) {
+    LOG.info("DEBUG: BlacklistNode not supported");
+  }
+
+  @Override
+  public void unblacklistNode(NodeId nodeId) {
+    LOG.info("DEBUG: unBlacklistNode not supported");
+  }
+
+  @Override
+  public void allocateTask(Object task, Resource capability, String[] hosts, String[] racks,
+                           Priority priority, Object containerSignature, Object clientCookie) {
+    String host = selectHost(hosts);
+    Container container =
+        containerFactory.createContainer(resourcePerContainer, priority, host, containerPort);
+    runningTasks.put(task, container.getId());
+    appClientDelegate.taskAllocated(task, clientCookie, container);
+  }
+
+
+  @Override
+  public void allocateTask(Object task, Resource capability, ContainerId containerId,
+                           Priority priority, Object containerSignature, Object clientCookie) {
+    String host = selectHost(null);
+    Container container =
+        containerFactory.createContainer(resourcePerContainer, priority, host, containerPort);
+    runningTasks.put(task, container.getId());
+    appClientDelegate.taskAllocated(task, clientCookie, container);
+  }
+
+  @Override
+  public boolean deallocateTask(Object task, boolean taskSucceeded) {
+    ContainerId containerId = runningTasks.remove(task);
+    if (containerId == null) {
+      LOG.error("Could not determine ContainerId for task: " + task +
+          " . Could have hit a race condition. Ignoring." +
+          " The query may hang since this \"unknown\" container is now taking up a slot permanently");
+      return false;
+    }
+    appClientDelegate.containerBeingReleased(containerId);
+    return true;
+  }
+
+  @Override
+  public Object deallocateContainer(ContainerId containerId) {
+    LOG.info("DEBUG: Ignoring deallocateContainer for containerId: " + containerId);
+    return null;
+  }
+
+  @Override
+  public void setShouldUnregister() {
+
+  }
+
+  @Override
+  public boolean hasUnregistered() {
+    // Nothing to do. No registration involved.
+    return true;
+  }
+
+  private ExecutorService createAppCallbackExecutorService() {
+    return Executors.newSingleThreadExecutor(new ThreadFactoryBuilder()
+        .setNameFormat("TaskSchedulerAppCaller #%d").setDaemon(true).build());
+  }
+
+  private TaskSchedulerAppCallback createAppCallbackDelegate(
+      TaskSchedulerAppCallback realAppClient) {
+    return new TaskSchedulerAppCallbackWrapper(realAppClient,
+        appCallbackExecutor);
+  }
+
+  private String selectHost(String[] requestedHosts) {
+    String host = null;
+    if (requestedHosts != null && requestedHosts.length > 0) {
+      Arrays.sort(requestedHosts);
+      host = requestedHosts[0];
+      LOG.info("Selected host: " + host + " from requested hosts: " + Arrays.toString(requestedHosts));
+    } else {
+      host = serviceHosts.get(random.nextInt(serviceHosts.size()));
+      LOG.info("Selected random host: " + host + " since the request contained no host information");
+    }
+    return host;
+  }
+
+  static class ContainerFactory {
+    final AppContext appContext;
+    AtomicInteger nextId;
+
+    public ContainerFactory(AppContext appContext) {
+      this.appContext = appContext;
+      this.nextId = new AtomicInteger(2);
+    }
+
+    public Container createContainer(Resource capability, Priority priority, String hostname, int port) {
+      ApplicationAttemptId appAttemptId = appContext.getApplicationAttemptId();
+      ContainerId containerId = ContainerId.newInstance(appAttemptId, nextId.getAndIncrement());
+      NodeId nodeId = NodeId.newInstance(hostname, port);
+      String nodeHttpAddress = "hostname:0";
+
+      Container container = Container.newInstance(containerId,
+          nodeId,
+          nodeHttpAddress,
+          capability,
+          priority,
+          null);
+
+      return container;
+    }
+  }
+
+  private static class FakeAmRmCallbackHandler implements AMRMClientAsync.CallbackHandler {
+
+    @Override
+    public void onContainersCompleted(List<ContainerStatus> statuses) {
+
+    }
+
+    @Override
+    public void onContainersAllocated(List<Container> containers) {
+
+    }
+
+    @Override
+    public void onShutdownRequest() {
+
+    }
+
+    @Override
+    public void onNodesUpdated(List<NodeReport> updatedNodes) {
+
+    }
+
+    @Override
+    public float getProgress() {
+      return 0;
+    }
+
+    @Override
+    public void onError(Throwable e) {
+
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
new file mode 100644
index 0000000..78cdcde
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
@@ -0,0 +1,182 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app.taskcomm;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+
+import com.google.protobuf.ByteString;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.tez.dag.api.TaskCommunicatorContext;
+import org.apache.tez.dag.app.TezTaskCommunicatorImpl;
+import org.apache.tez.dag.app.TezTestServiceCommunicator;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.runtime.api.impl.TaskSpec;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkResponseProto;
+import org.apache.tez.util.ProtoConverters;
+
+
+public class TezTestServiceTaskCommunicatorImpl extends TezTaskCommunicatorImpl {
+
+  private static final Log LOG = LogFactory.getLog(TezTestServiceTaskCommunicatorImpl.class);
+
+  private final TezTestServiceCommunicator communicator;
+  private final SubmitWorkRequestProto BASE_SUBMIT_WORK_REQUEST;
+  private final ConcurrentMap<String, ByteBuffer> credentialMap;
+
+  public TezTestServiceTaskCommunicatorImpl(
+      TaskCommunicatorContext taskCommunicatorContext) {
+    super(taskCommunicatorContext);
+    // TODO Maybe make this configurable
+    this.communicator = new TezTestServiceCommunicator(3);
+
+    SubmitWorkRequestProto.Builder baseBuilder = SubmitWorkRequestProto.newBuilder();
+
+    // TODO Avoid reading this from the environment
+    baseBuilder.setUser(System.getenv(ApplicationConstants.Environment.USER.name()));
+    baseBuilder.setApplicationIdString(
+        taskCommunicatorContext.getApplicationAttemptId().getApplicationId().toString());
+    baseBuilder
+        .setAppAttemptNumber(taskCommunicatorContext.getApplicationAttemptId().getAttemptId());
+    baseBuilder.setTokenIdentifier(getTokenIdentifier());
+
+    BASE_SUBMIT_WORK_REQUEST = baseBuilder.build();
+
+    credentialMap = new ConcurrentHashMap<String, ByteBuffer>();
+  }
+
+  @Override
+  public void serviceInit(Configuration conf) throws Exception {
+    super.serviceInit(conf);
+    this.communicator.init(conf);
+  }
+
+  @Override
+  public void serviceStart() {
+    super.serviceStart();
+    this.communicator.start();
+  }
+
+  @Override
+  public void serviceStop() {
+    super.serviceStop();
+  }
+
+
+  @Override
+  public void registerRunningContainer(ContainerId containerId, String hostname, int port) {
+    super.registerRunningContainer(containerId, hostname, port);
+  }
+
+  @Override
+  public void registerContainerEnd(ContainerId containerId) {
+    super.registerContainerEnd(containerId);
+  }
+
+  @Override
+  public void registerRunningTaskAttempt(final ContainerId containerId, final TaskSpec taskSpec,
+                                         Map<String, LocalResource> additionalResources,
+                                         Credentials credentials,
+                                         boolean credentialsChanged)  {
+    super.registerRunningTaskAttempt(containerId, taskSpec, additionalResources, credentials,
+        credentialsChanged);
+    SubmitWorkRequestProto requestProto = null;
+    try {
+      requestProto = constructSubmitWorkRequest(containerId, taskSpec);
+    } catch (IOException e) {
+      throw new RuntimeException("Failed to construct request", e);
+    }
+    ContainerInfo containerInfo = getContainerInfo(containerId);
+    String host;
+    int port;
+    if (containerInfo != null) {
+      synchronized (containerInfo) {
+        host = containerInfo.host;
+        port = containerInfo.port;
+      }
+    } else {
+      // TODO Handle this properly
+      throw new RuntimeException("ContainerInfo not found for container: " + containerId +
+          ", while trying to launch task: " + taskSpec.getTaskAttemptID());
+    }
+    communicator.submitWork(requestProto, host, port,
+        new TezTestServiceCommunicator.ExecuteRequestCallback<SubmitWorkResponseProto>() {
+          @Override
+          public void setResponse(SubmitWorkResponseProto response) {
+            LOG.info("Successfully launched task: " + taskSpec.getTaskAttemptID());
+            getTaskCommunicatorContext()
+                .taskStartedRemotely(taskSpec.getTaskAttemptID(), containerId);
+          }
+
+          @Override
+          public void indicateError(Throwable t) {
+            // TODO Handle this error. This is where an API on the context to indicate failure / rejection comes in.
+            LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " +
+                containerId, t);
+          }
+        });
+  }
+
+  @Override
+  public void unregisterRunningTaskAttempt(TezTaskAttemptID taskAttemptID) {
+    super.unregisterRunningTaskAttempt(taskAttemptID);
+    // Nothing else to do for now. The push API in the test does not support termination of a running task
+  }
+
+  private SubmitWorkRequestProto constructSubmitWorkRequest(ContainerId containerId,
+                                                            TaskSpec taskSpec) throws
+      IOException {
+    SubmitWorkRequestProto.Builder builder =
+        SubmitWorkRequestProto.newBuilder(BASE_SUBMIT_WORK_REQUEST);
+    builder.setContainerIdString(containerId.toString());
+    builder.setAmHost(getAddress().getHostName());
+    builder.setAmPort(getAddress().getPort());
+    Credentials taskCredentials = new Credentials();
+    // Credentials can change across DAGs. Ideally construct only once per DAG.
+    taskCredentials.addAll(getTaskCommunicatorContext().getCredentials());
+
+    ByteBuffer credentialsBinary = credentialMap.get(taskSpec.getDAGName());
+    if (credentialsBinary == null) {
+      credentialsBinary = serializeCredentials(getTaskCommunicatorContext().getCredentials());
+      credentialMap.putIfAbsent(taskSpec.getDAGName(), credentialsBinary.duplicate());
+    } else {
+      credentialsBinary = credentialsBinary.duplicate();
+    }
+    builder.setCredentialsBinary(ByteString.copyFrom(credentialsBinary));
+    builder.setTaskSpec(ProtoConverters.convertTaskSpecToProto(taskSpec));
+    return builder.build();
+  }
+
+  private ByteBuffer serializeCredentials(Credentials credentials) throws IOException {
+    Credentials containerCredentials = new Credentials();
+    containerCredentials.addAll(credentials);
+    DataOutputBuffer containerTokens_dob = new DataOutputBuffer();
+    containerCredentials.writeTokenStorageToStream(containerTokens_dob);
+    ByteBuffer containerCredentialsBuffer = ByteBuffer.wrap(containerTokens_dob.getData(), 0,
+        containerTokens_dob.getLength());
+    return containerCredentialsBuffer;
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java
new file mode 100644
index 0000000..2bca4ed
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.service;
+
+import java.io.IOException;
+
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
+
+public interface ContainerRunner {
+
+  void queueContainer(RunContainerRequestProto request) throws IOException;
+  void submitWork(SubmitWorkRequestProto request) throws IOException;
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
new file mode 100644
index 0000000..f47bd67
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.service;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+
+import com.google.common.base.Preconditions;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
+import org.apache.tez.service.impl.TezTestService;
+
+public class MiniTezTestServiceCluster extends AbstractService {
+
+  private static final Log LOG = LogFactory.getLog(MiniTezTestServiceCluster.class);
+
+  private final File testWorkDir;
+  private final long availableMemory;
+  private final int numExecutorsPerService;
+  private final String[] localDirs;
+  private final Configuration clusterSpecificConfiguration = new Configuration(false);
+
+  private TezTestService tezTestService;
+
+  public static MiniTezTestServiceCluster create(String clusterName, int numExecutorsPerService, long availableMemory, int numLocalDirs) {
+    return new MiniTezTestServiceCluster(clusterName, numExecutorsPerService, availableMemory, numLocalDirs);
+  }
+
+  // TODO Add support for multiple instances
+  private MiniTezTestServiceCluster(String clusterName, int numExecutorsPerService, long availableMemory, int numLocalDirs) {
+    super(clusterName + "_TezTestServerCluster");
+    Preconditions.checkArgument(numExecutorsPerService > 0);
+    Preconditions.checkArgument(availableMemory > 0);
+    Preconditions.checkArgument(numLocalDirs > 0);
+    String clusterNameTrimmed = clusterName.replace("$", "") + "_TezTestServerCluster";
+    File targetWorkDir = new File("target", clusterNameTrimmed);
+    try {
+      FileContext.getLocalFSFileContext().delete(
+          new Path(targetWorkDir.getAbsolutePath()), true);
+    } catch (Exception e) {
+      LOG.warn("Could not cleanup test workDir: " + targetWorkDir, e);
+      throw new RuntimeException("Could not cleanup test workDir: " + targetWorkDir, e);
+    }
+
+    if (Shell.WINDOWS) {
+      // The test working directory can exceed the maximum path length supported
+      // by some Windows APIs and cmd.exe (260 characters).  To work around this,
+      // create a symlink in temporary storage with a much shorter path,
+      // targeting the full path to the test working directory.  Then, use the
+      // symlink as the test working directory.
+      String targetPath = targetWorkDir.getAbsolutePath();
+      File link = new File(System.getProperty("java.io.tmpdir"),
+          String.valueOf(System.currentTimeMillis()));
+      String linkPath = link.getAbsolutePath();
+
+      try {
+        FileContext.getLocalFSFileContext().delete(new Path(linkPath), true);
+      } catch (IOException e) {
+        throw new YarnRuntimeException("could not cleanup symlink: " + linkPath, e);
+      }
+
+      // Guarantee target exists before creating symlink.
+      targetWorkDir.mkdirs();
+
+      Shell.ShellCommandExecutor shexec = new Shell.ShellCommandExecutor(
+          Shell.getSymlinkCommand(targetPath, linkPath));
+      try {
+        shexec.execute();
+      } catch (IOException e) {
+        throw new YarnRuntimeException(String.format(
+            "failed to create symlink from %s to %s, shell output: %s", linkPath,
+            targetPath, shexec.getOutput()), e);
+      }
+
+      this.testWorkDir = link;
+    } else {
+      this.testWorkDir = targetWorkDir;
+    }
+    this.numExecutorsPerService = numExecutorsPerService;
+    this.availableMemory = availableMemory;
+
+    // Setup Local Dirs
+    localDirs = new String[numLocalDirs];
+    for (int i = 0 ; i < numLocalDirs ; i++) {
+      File f = new File(testWorkDir, "localDir");
+      f.mkdirs();
+      LOG.info("Created localDir: " + f.getAbsolutePath());
+      localDirs[i] = f.getAbsolutePath();
+    }
+  }
+
+  @Override
+  public void serviceInit(Configuration conf) {
+    tezTestService = new TezTestService(conf, numExecutorsPerService, availableMemory, localDirs);
+    tezTestService.init(conf);
+
+  }
+
+  @Override
+  public void serviceStart() {
+    tezTestService.start();
+
+    clusterSpecificConfiguration.set(TezTestServiceConfConstants.TEZ_TEST_SERVICE_HOSTS,
+        getServiceAddress().getHostName());
+    clusterSpecificConfiguration.setInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_RPC_PORT,
+        getServiceAddress().getPort());
+
+    clusterSpecificConfiguration.setInt(
+        TezTestServiceConfConstants.TEZ_TEST_SERVICE_NUM_EXECUTORS_PER_INSTANCE,
+        numExecutorsPerService);
+    clusterSpecificConfiguration.setLong(
+        TezTestServiceConfConstants.TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB, availableMemory);
+  }
+
+  @Override
+  public void serviceStop() {
+    tezTestService.stop();
+  }
+
+  /**
+   * return the address at which the service is listening
+   * @return host:port
+   */
+  public InetSocketAddress getServiceAddress() {
+    Preconditions.checkState(getServiceState() == STATE.STARTED);
+    return tezTestService.getListenerAddress();
+  }
+
+  public int getShufflePort() {
+    Preconditions.checkState(getServiceState() == STATE.STARTED);
+    return tezTestService.getShufflePort();
+  }
+
+  public Configuration getClusterSpecificConfiguration() {
+    Preconditions.checkState(getServiceState() == STATE.STARTED);
+    return clusterSpecificConfiguration;
+  }
+
+  // Mainly for verification
+  public int getNumSubmissions() {
+    return tezTestService.getNumSubmissions();
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceConfConstants.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceConfConstants.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceConfConstants.java
new file mode 100644
index 0000000..bf4a5bd
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceConfConstants.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.service;
+
+public class TezTestServiceConfConstants {
+
+  private static final String TEZ_TEST_SERVICE_PREFIX = "tez.test.service.";
+
+  /** Number of executors per instance - used by the scheduler */
+  public static final String TEZ_TEST_SERVICE_NUM_EXECUTORS_PER_INSTANCE = TEZ_TEST_SERVICE_PREFIX + "num.executors.per-instance";
+
+  /** Memory available per instance - used by the scheduler */
+  public static final String TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB = TEZ_TEST_SERVICE_PREFIX + "memory.per.instance.mb";
+
+  /** CPUs available per instance - used by the scheduler */
+  public static final String TEZ_TEST_SERVICE_VCPUS_PER_INSTANCE = TEZ_TEST_SERVICE_PREFIX + "vcpus.per.instance";
+
+
+  /** Hosts on which the service is running. Currently assuming a single port for all instances */
+  public static final String TEZ_TEST_SERVICE_HOSTS = TEZ_TEST_SERVICE_PREFIX + "hosts";
+
+  /** Port on which the Service(s) listen. Current a single port for all instances */
+  public static final String TEZ_TEST_SERVICE_RPC_PORT = TEZ_TEST_SERVICE_PREFIX + "rpc.port";
+
+  /** Number of threads to use in the AM to communicate with the external service */
+  public static final String TEZ_TEST_SERVICE_AM_COMMUNICATOR_NUM_THREADS = TEZ_TEST_SERVICE_PREFIX + "communicator.num.threads";
+  public static final int TEZ_TEST_SERVICE_AM_COMMUNICATOR_NUM_THREADS_DEFAULT = 2;
+
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceProtocolBlockingPB.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceProtocolBlockingPB.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceProtocolBlockingPB.java
new file mode 100644
index 0000000..1108f72
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceProtocolBlockingPB.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.service;
+
+import org.apache.hadoop.ipc.ProtocolInfo;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
+
+@ProtocolInfo(protocolName = "org.apache.tez.service.TezTestServiceProtocolBlockingPB", protocolVersion = 1)
+public interface TezTestServiceProtocolBlockingPB extends TezTestServiceProtocolProtos.TezTestServiceProtocol.BlockingInterface {
+}
\ No newline at end of file


[16/43] tez git commit: TEZ-2412. Should kill vertex in DAGImpl#VertexRerunWhileCommitting (zjffdu)

Posted by ss...@apache.org.
TEZ-2412. Should kill vertex in DAGImpl#VertexRerunWhileCommitting (zjffdu)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/6e6ad706
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/6e6ad706
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/6e6ad706

Branch: refs/heads/TEZ-2003
Commit: 6e6ad706f5b6611058541c3bf072343bf002ced5
Parents: 4a6808c
Author: Jeff Zhang <zj...@apache.org>
Authored: Fri May 8 19:55:53 2015 +0800
Committer: Jeff Zhang <zj...@apache.org>
Committed: Fri May 8 19:55:53 2015 +0800

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../apache/tez/dag/app/dag/impl/DAGImpl.java    | 20 ++++++++++++++++----
 .../apache/tez/dag/app/dag/impl/VertexImpl.java |  3 +--
 .../apache/tez/dag/app/dag/impl/TestCommit.java | 14 ++++++++++++--
 4 files changed, 30 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/6e6ad706/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 3520768..185e1b0 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,7 @@ INCOMPATIBLE CHANGES
     Default max limit increased. Should not affect existing users.
 
 ALL CHANGES:
+  TEZ-2412. Should kill vertex in DAGImpl#VertexRerunWhileCommitting
   TEZ-2410. VertexGroupCommitFinishedEvent & VertexCommitStartedEvent is not logged correctly
   TEZ-776. Reduce AM mem usage caused by storing TezEvents
   TEZ-2423. Tez UI: Remove Attempt Index column from task->attempts page

http://git-wip-us.apache.org/repos/asf/tez/blob/6e6ad706/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
index 1726c18..0a87241 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
@@ -1206,6 +1206,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
           return dag.finished(DAGState.SUCCEEDED);
         }
       } else {
+        // check commits before move to COMPLETED state.
         if (dag.commitFutures.isEmpty()) {
           return finishWithTerminationCause(dag);
         } else {
@@ -1218,7 +1219,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
     return dag.getInternalState();
   }
 
-  // triggered by commit_complete
+  // triggered by commit_complete, checkCommitsForCompletion should only been called in COMMITTING/TERMINATING
   static DAGState checkCommitsForCompletion(DAGImpl dag) {
     LOG.info("Checking commits for DAG completion"
         + ", numCompletedVertices=" + dag.numCompletedVertices
@@ -1240,8 +1241,11 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
         return dag.finished(DAGState.SUCCEEDED);
       }
     } else {
-      if (!dag.commitFutures.isEmpty()) {
-        // pending commits are running
+      Preconditions.checkState(dag.getState() == DAGState.TERMINATING
+          || dag.getState() == DAGState.COMMITTING,
+          "DAG should be in COMMITTING/TERMINATING state, but in " + dag.getState());
+      if (!dag.commitFutures.isEmpty() || dag.numCompletedVertices != dag.numVertices) {
+        // pending commits are running or still some vertices are not completed
         return DAGState.TERMINATING;
       } else {
         return finishWithTerminationCause(dag);
@@ -2155,8 +2159,16 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
     @Override
     public void transition(DAGImpl dag, DAGEvent event) {
       LOG.info("Vertex rerun while dag it is COMMITTING");
+      DAGEventVertexReRunning rerunEvent = (DAGEventVertexReRunning)event;
+      Vertex vertex = dag.getVertex(rerunEvent.getVertexId());
+      dag.reRunningVertices.add(vertex.getVertexId());
+      dag.numSuccessfulVertices--;
+      dag.numCompletedVertices--;
+      dag.addDiagnostic("Vertex re-running"
+          + ", vertexName=" + vertex.getName()
+          + ", vertexId=" + vertex.getVertexId());
       dag.cancelCommits();
-      dag.trySetTerminationCause(DAGTerminationCause.VERTEX_RERUN_IN_COMMITTING);
+      dag.enactKill(DAGTerminationCause.VERTEX_RERUN_IN_COMMITTING, VertexTerminationCause.VERTEX_RERUN_IN_COMMITTING);
     }
 
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/6e6ad706/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
index 3a9558d..6b208b0 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
@@ -3669,6 +3669,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
         case INIT_FAILURE:
         case INTERNAL_ERROR:
         case AM_USERCODE_FAILURE:
+        case VERTEX_RERUN_IN_COMMITTING:
         case VERTEX_RERUN_AFTER_COMMIT:
         case OTHER_VERTEX_FAILURE: vertex.tryEnactKill(trigger, TaskTerminationCause.OTHER_VERTEX_FAILURE); break;
         default://should not occur
@@ -3685,8 +3686,6 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
 
     @Override
     public void transition(VertexImpl vertex, VertexEvent event) {
-
-
       VertexEventTermination vet = (VertexEventTermination) event;
       VertexTerminationCause trigger = vet.getTerminationCause();
       String msg = "Vertex received Kill while in COMMITTING state, terminationCause="

http://git-wip-us.apache.org/repos/asf/tez/blob/6e6ad706/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
index 8fc29c2..3d3bca4 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
@@ -1703,15 +1703,25 @@ public class TestCommit {
     v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(),
         TaskState.SUCCEEDED));
     waitUntil(dag, DAGState.COMMITTING);
-    dag.handle(new DAGEventVertexReRunning(v1.getVertexId()));
+    TezTaskID newTaskId = TezTaskID.getInstance(v1.getVertexId(), 1);
+    v1.handle(new VertexEventTaskReschedule(newTaskId));
+    // dag is in TERMINATING, wait for the complete of its rescheduled tasks
+    waitUntil(dag, DAGState.TERMINATING);
+    waitUntil(v1, VertexState.TERMINATING);
+    // reschedueled task is killed
+    v1.handle(new VertexEventTaskCompleted(newTaskId, TaskState.KILLED));
     waitUntil(dag, DAGState.FAILED);
+    Assert.assertEquals(VertexState.FAILED, v1.getState());
+    Assert.assertEquals(DAGState.FAILED, dag.getState());
+    Assert.assertEquals(VertexTerminationCause.VERTEX_RERUN_IN_COMMITTING, v1.getTerminationCause());
 
     Assert.assertEquals(DAGTerminationCause.VERTEX_RERUN_IN_COMMITTING, dag.getTerminationCause());
     Assert.assertTrue(dag.commitFutures.isEmpty());
     historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
     historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
     historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
-    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+    // VertexFinishedEvent is logged twice due to vertex-rerun
+    historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 2);
     historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
     historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
     historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);


[36/43] tez git commit: TEZ-2285. Allow TaskCommunicators to indicate task/container liveness. (sseth)

Posted by ss...@apache.org.
TEZ-2285. Allow TaskCommunicators to indicate task/container liveness.  (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/5239a45b
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/5239a45b
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/5239a45b

Branch: refs/heads/TEZ-2003
Commit: 5239a45b78cdd5eec194a36545353d07ad94f34b
Parents: 2b05376
Author: Siddharth Seth <ss...@apache.org>
Authored: Tue Apr 7 13:22:09 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:31 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                                |  1 +
 .../apache/tez/dag/api/TaskCommunicatorContext.java |  4 ++++
 .../tez/dag/app/TaskAttemptListenerImpTezDag.java   | 10 ++++++++++
 .../apache/tez/dag/app/TezTaskCommunicatorImpl.java | 16 +++++++++-------
 4 files changed, 24 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/5239a45b/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index e2c428d..9d6b220 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -14,5 +14,6 @@ ALL CHANGES:
   TEZ-2241. Miscellaneous fixes after last reabse.
   TEZ-2283. Fixes after rebase 04/07.
   TEZ-2284. Separate TaskReporter into an interface.
+  TEZ-2285. Allow TaskCommunicators to indicate task/container liveness.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/5239a45b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
index a85fb7f..0c3bac3 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
@@ -43,6 +43,10 @@ public interface TaskCommunicatorContext {
 
   boolean isKnownContainer(ContainerId containerId);
 
+  void taskAlive(TezTaskAttemptID taskAttemptId);
+
+  void containerAlive(ContainerId containerId);
+
   // TODO TEZ-2003 Move to vertex, taskIndex, version. Rename to taskAttempt*
   void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId);
 

http://git-wip-us.apache.org/repos/asf/tez/blob/5239a45b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index 0ee448b..cd39cc8 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -259,6 +259,16 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
   }
 
   @Override
+  public void taskAlive(TezTaskAttemptID taskAttemptId) {
+    taskHeartbeatHandler.pinged(taskAttemptId);
+  }
+
+  @Override
+  public void containerAlive(ContainerId containerId) {
+    pingContainerHeartbeatHandler(containerId);
+  }
+
+  @Override
   public void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId) {
     context.getEventHandler()
         .handle(new TaskAttemptEventStartedRemotely(taskAttemptID, containerId, null));

http://git-wip-us.apache.org/repos/asf/tez/blob/5239a45b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
index 9d0c031..ef4f764 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -65,17 +65,19 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
       null, true, null, null, false);
 
   private final TaskCommunicatorContext taskCommunicatorContext;
+  private final TezTaskUmbilicalProtocol taskUmbilical;
 
-  private final ConcurrentMap<ContainerId, ContainerInfo> registeredContainers =
+  protected final ConcurrentMap<ContainerId, ContainerInfo> registeredContainers =
       new ConcurrentHashMap<ContainerId, ContainerInfo>();
-  private final ConcurrentMap<TaskAttempt, ContainerId> attemptToContainerMap =
+  protected final ConcurrentMap<TaskAttempt, ContainerId> attemptToContainerMap =
       new ConcurrentHashMap<TaskAttempt, ContainerId>();
 
-  private final TezTaskUmbilicalProtocol taskUmbilical;
-  private final String tokenIdentifier;
-  private final Token<JobTokenIdentifier> sessionToken;
+
+  protected final String tokenIdentifier;
+  protected final Token<JobTokenIdentifier> sessionToken;
   protected InetSocketAddress address;
-  private Server server;
+
+  protected volatile Server server;
 
   public static final class ContainerInfo {
 
@@ -438,7 +440,7 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
 
 
   // Holder for Task information, which eventually will likely be VertexImplm taskIndex, attemptIndex
-  private static class TaskAttempt {
+  protected static class TaskAttempt {
     // TODO TEZ-2003 Change this to work with VertexName, int id, int version
     // TODO TEZ-2003 Avoid constructing this unit all over the place
     private TezTaskAttemptID taskAttemptId;


[40/43] tez git commit: TEZ-2381. Fixes after rebase 04/28. (sseth)

Posted by ss...@apache.org.
TEZ-2381. Fixes after rebase 04/28. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/9e098f70
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/9e098f70
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/9e098f70

Branch: refs/heads/TEZ-2003
Commit: 9e098f7024f0be9d53f28230893890766e37fe95
Parents: 0137467
Author: Siddharth Seth <ss...@apache.org>
Authored: Tue Apr 28 13:41:12 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:42:26 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                            |  1 +
 .../dag/app/TaskAttemptListenerImpTezDag.java   | 17 ++++----
 .../app/TestTaskAttemptListenerImplTezDag.java  | 44 +++++++++++++++-----
 3 files changed, 42 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/9e098f70/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index f6bc8e7..d42aaf8 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -18,5 +18,6 @@ ALL CHANGES:
   TEZ-2302. Allow TaskCommunicators to subscribe for Vertex updates.
   TEZ-2347. Expose additional information in TaskCommunicatorContext.
   TEZ-2361. Propagate dag completion to TaskCommunicator.
+  TEZ-2381. Fixes after rebase 04/28.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/9e098f70/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index 03b5602..d30919b 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -17,28 +17,22 @@
 
 package org.apache.tez.dag.app;
 
-import javax.annotation.Nullable;
 import java.io.IOException;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
-import java.net.InetSocketAddress;
-import java.net.URISyntaxException;
-import java.net.UnknownHostException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
-import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.commons.collections4.ListUtils;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEvent;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventStatusUpdate;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
 import org.apache.tez.runtime.api.events.TaskStatusUpdateEvent;
 import org.apache.tez.runtime.api.impl.EventType;
-import com.google.common.base.Preconditions;
-import org.apache.tez.dag.api.event.VertexState;
 import org.apache.tez.dag.api.event.VertexStateUpdate;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -62,7 +56,6 @@ import org.apache.tez.dag.app.dag.Task;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptKilled;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely;
-import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
 import org.apache.tez.dag.app.dag.event.VertexEventRouteEvent;
 import org.apache.tez.dag.app.rm.container.AMContainerTask;
 import org.apache.tez.dag.records.TezTaskAttemptID;
@@ -151,7 +144,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
   private TaskCommunicator createTaskCommunicator(String taskCommClassIdentifier, int taskCommIndex) {
     if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
       LOG.info("Using Default Task Communicator");
-      return new TezTaskCommunicatorImpl(taskCommunicatorContexts[taskCommIndex]);
+      return createTezTaskCommunicator(taskCommunicatorContexts[taskCommIndex]);
     } else if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
       LOG.info("Using Default Local Task Communicator");
       return new TezLocalTaskCommunicatorImpl(taskCommunicatorContexts[taskCommIndex]);
@@ -174,6 +167,12 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
       }
     }
   }
+
+  @VisibleForTesting
+  protected TezTaskCommunicatorImpl createTezTaskCommunicator(TaskCommunicatorContext context) {
+    return new TezTaskCommunicatorImpl(context);
+  }
+
   public TaskHeartbeatResponse heartbeat(TaskHeartbeatRequest request)
       throws IOException, TezException {
     ContainerId containerId = ConverterUtils.toContainerId(request

http://git-wip-us.apache.org/repos/asf/tez/blob/9e098f70/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
index 076f9e0..ae9ebc0 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
@@ -43,11 +43,9 @@ import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.tez.common.ContainerContext;
 import org.apache.tez.common.ContainerTask;
 import org.apache.tez.common.security.JobTokenSecretManager;
+import org.apache.tez.dag.api.TaskHeartbeatRequest;
 import org.apache.tez.dag.api.TezException;
 import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.event.EventHandler;
-import org.apache.tez.common.ContainerContext;
-import org.apache.tez.common.ContainerTask;
 import org.apache.tez.common.TezTaskUmbilicalProtocol;
 import org.apache.tez.dag.api.TaskCommunicatorContext;
 import org.apache.tez.dag.app.dag.DAG;
@@ -79,7 +77,9 @@ import org.mockito.ArgumentCaptor;
 // TODO TEZ-2003 Rename to TestTezTaskCommunicator
 public class TestTaskAttemptListenerImplTezDag {
   private ApplicationId appId;
+  private ApplicationAttemptId appAttemptId;
   private AppContext appContext;
+  Credentials credentials;
   AMContainerMap amContainerMap;
   EventHandler eventHandler;
   DAG dag;
@@ -95,11 +95,13 @@ public class TestTaskAttemptListenerImplTezDag {
   @Before
   public void setUp() {
     appId = ApplicationId.newInstance(1000, 1);
+    appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
     dag = mock(DAG.class);
     TezDAGID dagID = TezDAGID.getInstance(appId, 1);
     vertexID = TezVertexID.getInstance(dagID, 1);
     taskID = TezTaskID.getInstance(vertexID, 1);
     taskAttemptID = TezTaskAttemptID.getInstance(taskID, 1);
+    credentials = new Credentials();
 
     amContainerMap = mock(AMContainerMap.class);
     Map<ApplicationAccessType, String> appAcls = new HashMap<ApplicationAccessType, String>();
@@ -111,6 +113,8 @@ public class TestTaskAttemptListenerImplTezDag {
     doReturn(dag).when(appContext).getCurrentDAG();
     doReturn(appAcls).when(appContext).getApplicationACLs();
     doReturn(amContainerMap).when(appContext).getAllContainers();
+    doReturn(appAttemptId).when(appContext).getApplicationAttemptId();
+    doReturn(credentials).when(appContext).getAppCredentials();
     NodeId nodeId = NodeId.newInstance("localhost", 0);
 
     AMContainer amContainer = mock(AMContainer.class);
@@ -152,7 +156,7 @@ public class TestTaskAttemptListenerImplTezDag {
     assertEquals(taskSpec, containerTask.getTaskSpec());
 
     // Task unregistered. Should respond to heartbeats
-    taskAttemptListener.unregisterTaskAttempt(taskAttemptId, 0);
+    taskAttemptListener.unregisterTaskAttempt(taskAttemptID, 0);
     containerTask = tezUmbilical.getTask(containerContext2);
     assertNull(containerTask);
 
@@ -182,7 +186,7 @@ public class TestTaskAttemptListenerImplTezDag {
     TezTaskUmbilicalProtocol tezUmbilical = taskCommunicator.getUmbilical();
 
     ContainerId containerId1 = createContainerId(appId, 1);
-    doReturn(mock(AMContainer.class)).when(amContainerMap).get(containerId1);
+
     ContainerContext containerContext1 = new ContainerContext(containerId1.toString());
     taskAttemptListener.registerRunningContainer(containerId1, 0);
     containerTask = tezUmbilical.getTask(containerContext1);
@@ -259,7 +263,6 @@ public class TestTaskAttemptListenerImplTezDag {
       int fromEventId, int maxEvents, int nextFromEventId,
       List<TezEvent> sendEvents) throws IOException, TezException {
     ContainerId containerId = createContainerId(appId, 1);
-    long requestId = 0;
     Vertex vertex = mock(Vertex.class);
 
     doReturn(vertex).when(dag).getVertex(vertexID);
@@ -267,13 +270,13 @@ public class TestTaskAttemptListenerImplTezDag {
     TaskAttemptEventInfo eventInfo = new TaskAttemptEventInfo(nextFromEventId, sendEvents);
     doReturn(eventInfo).when(vertex).getTaskAttemptTezEvents(taskAttemptID, fromEventId, maxEvents);
 
-    taskAttemptListener.registerRunningContainer(containerId);
-    taskAttemptListener.registerTaskAttempt(amContainerTask, containerId);
+    taskAttemptListener.registerRunningContainer(containerId, 0);
+    taskAttemptListener.registerTaskAttempt(amContainerTask, containerId, 0);
+
+    TaskHeartbeatRequest request = mock(TaskHeartbeatRequest.class);
 
-    TezHeartbeatRequest request = mock(TezHeartbeatRequest.class);
     doReturn(containerId.toString()).when(request).getContainerIdentifier();
-    doReturn(taskAttemptID).when(request).getCurrentTaskAttemptID();
-    doReturn(++requestId).when(request).getRequestId();
+    doReturn(taskAttemptID).when(request).getTaskAttemptId();
     doReturn(events).when(request).getEvents();
     doReturn(maxEvents).when(request).getMaxEvents();
     doReturn(fromEventId).when(request).getStartIndex();
@@ -287,6 +290,25 @@ public class TestTaskAttemptListenerImplTezDag {
     return ContainerId.newInstance(appAttemptId, containerIdx);
   }
 
+  private static class TaskAttemptListenerImplForTest extends TaskAttemptListenerImpTezDag {
+
+    public TaskAttemptListenerImplForTest(AppContext context,
+                                          TaskHeartbeatHandler thh,
+                                          ContainerHeartbeatHandler chh,
+                                          JobTokenSecretManager jobTokenSecretManager,
+                                          String[] taskCommunicatorClassIdentifiers,
+                                          boolean isPureLocalMode) {
+      super(context, thh, chh, jobTokenSecretManager, taskCommunicatorClassIdentifiers,
+          isPureLocalMode);
+    }
+
+    @Override
+    protected TezTaskCommunicatorImpl createTezTaskCommunicator(TaskCommunicatorContext context) {
+      return new TezTaskCommunicatorImplForTest(context);
+    }
+
+  }
+
   private static class TezTaskCommunicatorImplForTest extends TezTaskCommunicatorImpl {
 
     public TezTaskCommunicatorImplForTest(


[06/43] tez git commit: TEZ-2419. Inputs/Outputs should inform the Processor about Interrupts when interrupted during a blocking Op. (sseth)

Posted by ss...@apache.org.
TEZ-2419. Inputs/Outputs should inform the Processor about Interrupts
when interrupted during a blocking Op. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/55308630
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/55308630
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/55308630

Branch: refs/heads/TEZ-2003
Commit: 55308630b6354ce070550d1ea4efbedbbae8e13a
Parents: 7476fae
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed May 6 14:39:08 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Wed May 6 14:39:08 2015 -0700

----------------------------------------------------------------------
 CHANGES.txt                                     |   1 +
 .../tez/mapreduce/lib/MRReaderMapReduce.java    |   3 +-
 .../tez/mapreduce/lib/MRReaderMapred.java       |   3 +
 .../apache/tez/mapreduce/output/MROutput.java   |   3 +-
 .../library/api/IOInterruptedException.java     |  40 +++++++
 .../tez/runtime/library/api/KeyValueReader.java |   2 +
 .../tez/runtime/library/api/KeyValueWriter.java |   2 +
 .../runtime/library/api/KeyValuesReader.java    |   1 +
 .../runtime/library/api/KeyValuesWriter.java    |   2 +
 .../common/readers/UnorderedKVReader.java       |   4 +-
 .../common/shuffle/orderedgrouped/Shuffle.java  |   6 +-
 .../common/sort/impl/ExternalSorter.java        |   3 +-
 .../common/sort/impl/PipelinedSorter.java       |  20 ++--
 .../common/sort/impl/dflt/DefaultSorter.java    |  11 +-
 .../writers/UnorderedPartitionedKVWriter.java   |   3 +-
 .../input/ConcatenatedMergedKeyValueInput.java  |   8 +-
 .../input/ConcatenatedMergedKeyValuesInput.java |   8 +-
 .../library/input/OrderedGroupedKVInput.java    |  12 +-
 .../common/readers/TestUnorderedKVReader.java   |  22 ++++
 .../input/TestOrderedGroupedKVInput.java        | 113 +++++++++++++++++++
 20 files changed, 246 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index fd45454..c865f12 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -7,6 +7,7 @@ Release 0.8.0: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-2419. Inputs/Outputs should inform the Processor about Interrupts when interrupted during a blocking Op.
   TEZ-1752. Inputs / Outputs in the Runtime library should be interruptable.
 
 Release 0.7.0: Unreleased

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapReduce.java
----------------------------------------------------------------------
diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapReduce.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapReduce.java
index 0495751..5fc3e49 100644
--- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapReduce.java
+++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapReduce.java
@@ -20,6 +20,7 @@ package org.apache.tez.mapreduce.lib;
 
 import java.io.IOException;
 
+import org.apache.tez.runtime.library.api.IOInterruptedException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.mapred.JobConf;
@@ -116,7 +117,7 @@ public class MRReaderMapReduce extends MRReader {
       hasNext = recordReader.nextKeyValue();
     } catch (InterruptedException e) {
       Thread.currentThread().interrupt();
-      throw new IOException("Interrupted while checking for next key-value", e);
+      throw new IOInterruptedException("Interrupted while checking for next key-value", e);
     }
     if (hasNext) {
       inputRecordCounter.increment(1);

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapred.java
----------------------------------------------------------------------
diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapred.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapred.java
index 366e7a7..1bf71f6 100644
--- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapred.java
+++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapred.java
@@ -117,6 +117,9 @@ public class MRReaderMapred extends MRReader {
       hasCompletedProcessing();
       completedProcessing = true;
     }
+    // The underlying reader does not throw InterruptedExceptions. Cannot convert to an
+    // IOInterruptedException without checking the interrupt flag on each request, which is also
+    // not guaranteed. Relying on the user to ensure Interrupts are handled correctly.
     return hasNext;
   }
 

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/output/MROutput.java
----------------------------------------------------------------------
diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/output/MROutput.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/output/MROutput.java
index d19f707..a3b19ed 100644
--- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/output/MROutput.java
+++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/output/MROutput.java
@@ -31,6 +31,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import org.apache.commons.lang.StringUtils;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
@@ -499,7 +500,7 @@ public class MROutput extends AbstractLogicalOutput {
             newRecordWriter.write(key, value);
           } catch (InterruptedException e) {
             Thread.currentThread().interrupt();
-            throw new IOException("Interrupted while writing next key-value",e);
+            throw new IOInterruptedException("Interrupted while writing next key-value",e);
           }
         } else {
           oldRecordWriter.write(key, value);

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/IOInterruptedException.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/IOInterruptedException.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/IOInterruptedException.java
new file mode 100644
index 0000000..776b2a3
--- /dev/null
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/IOInterruptedException.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.runtime.library.api;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * Indicates that an IOOperation was interrupted
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class IOInterruptedException extends IOException {
+
+  public IOInterruptedException(String message) {
+    super(message);
+  }
+
+  public IOInterruptedException(String message, Throwable cause) {
+    super(message, cause);
+  }
+
+  public IOInterruptedException(Throwable cause) {
+    super(cause);
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueReader.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueReader.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueReader.java
index d504d08..47f335b 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueReader.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueReader.java
@@ -49,6 +49,7 @@ public abstract class KeyValueReader extends Reader {
    * @return true if another key/value(s) pair exists, false if there are no more.
    * @throws IOException
    *           if an error occurs
+   * @throws {@link IOInterruptedException} if IO was performing a blocking operation and was interrupted
    */
   public abstract boolean next() throws IOException;
 
@@ -63,6 +64,7 @@ public abstract class KeyValueReader extends Reader {
   /**
    * Returns the current value
    * @return the current value
+   *
    * @throws IOException
    */
   public abstract Object getCurrentValue() throws IOException;

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueWriter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueWriter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueWriter.java
index 6acb24b..b5c4294 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueWriter.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueWriter.java
@@ -39,6 +39,8 @@ public abstract class KeyValueWriter extends Writer {
    *          the value to write
    * @throws IOException
    *           if an error occurs
+   * @throws {@link IOInterruptedException} if IO was interrupted
+   * @throws {@link IOInterruptedException} if IO was performing a blocking operation and was interrupted
    */
   public abstract void write(Object key, Object value) throws IOException;
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesReader.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesReader.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesReader.java
index 510f4b7..7760818 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesReader.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesReader.java
@@ -49,6 +49,7 @@ public abstract class KeyValuesReader extends Reader {
    * @return true if another key/value(s) pair exists, false if there are no more.
    * @throws IOException
    *           if an error occurs
+   * @throws {@link IOInterruptedException} if IO was performing a blocking operation and was interrupted
    */
   public abstract boolean next() throws IOException;
 

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesWriter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesWriter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesWriter.java
index 50fc2d6..9cdde43 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesWriter.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesWriter.java
@@ -38,6 +38,8 @@ public abstract class KeyValuesWriter extends KeyValueWriter {
    * @param values
    *          values to write
    * @throws java.io.IOException
+   * @throws {@link IOInterruptedException} if IO was interrupted
+   * @throws {@link IOInterruptedException} if IO was performing a blocking operation and was interrupted
    */
   public abstract void write(Object key, Iterable<Object> values) throws IOException;
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/readers/UnorderedKVReader.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/readers/UnorderedKVReader.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/readers/UnorderedKVReader.java
index fc2e312..a8dd1b2 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/readers/UnorderedKVReader.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/readers/UnorderedKVReader.java
@@ -20,6 +20,7 @@ package org.apache.tez.runtime.library.common.readers;
 
 import java.io.IOException;
 
+import org.apache.tez.runtime.library.api.IOInterruptedException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
@@ -168,7 +169,6 @@ public class UnorderedKVReader<K, V> extends KeyValueReader {
    * 
    * @return true if the next input exists, false otherwise
    * @throws IOException
-   * @throws InterruptedException
    */
   private boolean moveToNextInput() throws IOException {
     if (currentReader != null) { // Close the current reader.
@@ -185,7 +185,7 @@ public class UnorderedKVReader<K, V> extends KeyValueReader {
     } catch (InterruptedException e) {
       LOG.warn("Interrupted while waiting for next available input", e);
       Thread.currentThread().interrupt();
-      throw new IOException(e);
+      throw new IOInterruptedException(e);
     }
     if (currentFetchedInput == null) {
       hasCompletedProcessing();

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
index ee05378..cb12a63 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
@@ -305,6 +305,7 @@ public class Shuffle implements ExceptionReporter {
       kvIter = runShuffleFuture.get();
     } catch (ExecutionException e) {
       Throwable cause = e.getCause();
+      // Processor interrupted while waiting for errors, will see an InterruptedException.
       handleThrowable(cause);
     }
     if (isShutDown.get()) {
@@ -375,7 +376,9 @@ public class Shuffle implements ExceptionReporter {
       try {
         kvIter = merger.close();
       } catch (Throwable e) {
-        throw new ShuffleError("Error while doing final merge " , e);
+        // Set the throwable so that future.get() sees the reported errror.
+        throwable.set(e);
+        throw new ShuffleError("Error while doing final merge ", e);
       }
       mergePhaseTime.setValue(System.currentTimeMillis() - startTime);
 
@@ -513,6 +516,7 @@ public class Shuffle implements ExceptionReporter {
         LOG.info("Already shutdown. Ignoring error");
       } else {
         LOG.error("ShuffleRunner failed with error", t);
+        // In case of an abort / Interrupt - the runtime makes sure that this is ignored.
         inputContext.fatalError(t, "Shuffle Runner Failed");
         cleanupIgnoreErrors();
       }

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java
index ca4d889..40d22fe 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java
@@ -25,6 +25,7 @@ import java.util.Iterator;
 import java.util.Map;
 
 import com.google.common.collect.Maps;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
@@ -267,7 +268,7 @@ public abstract class ExternalSorter {
       combiner.combine(kvIter, writer);
     } catch (InterruptedException e) {
       Thread.currentThread().interrupt();
-      throw new IOException(e);
+      throw new IOInterruptedException("Combiner interrupted", e);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java
index 030440e..d9de921 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java
@@ -29,6 +29,7 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.ListIterator;
 import java.util.PriorityQueue;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
@@ -37,6 +38,7 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Stopwatch;
 import com.google.common.collect.Lists;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -341,7 +343,7 @@ public class PipelinedSorter extends ExternalSorter {
     mapOutputByteCounter.increment(valend - keystart);
   }
 
-  public void spill() throws IOException { 
+  public void spill() throws IOException {
     // create spill file
     final long size = capacity +
         + (partitions * APPROX_HEADER_LENGTH);
@@ -352,7 +354,13 @@ public class PipelinedSorter extends ExternalSorter {
     FSDataOutputStream out = rfs.create(filename, true, 4096);
 
     try {
-      merger.ready(); // wait for all the future results from sort threads
+      try {
+        merger.ready(); // wait for all the future results from sort threads
+      } catch (InterruptedException e) {
+        Thread.currentThread().interrupt();
+        LOG.info("Interrupted while waiting for mergers to complete");
+        throw new IOInterruptedException("Interrupted while waiting for mergers to complete", e);
+      }
       LOG.info("Spilling to " + filename.toString());
       for (int i = 0; i < partitions; ++i) {
         if (isThreadInterrupted()) {
@@ -391,9 +399,6 @@ public class PipelinedSorter extends ExternalSorter {
       //TODO: honor cache limits
       indexCacheList.add(spillRec);
       ++numSpills;
-    } catch(InterruptedException ie) {
-      // TODO:the combiner has been interrupted
-      Thread.currentThread().interrupt();
     } finally {
       out.close();
     }
@@ -568,6 +573,7 @@ public class PipelinedSorter extends ExternalSorter {
         cleanup();
       }
       Thread.currentThread().interrupt();
+      throw new IOInterruptedException("Interrupted while closing Output", ie);
     }
   }
 
@@ -1046,7 +1052,7 @@ public class PipelinedSorter extends ExternalSorter {
           iter = futureIter.get();
           this.add(iter);
         }
-        
+
         StringBuilder sb = new StringBuilder();
         for(SpanIterator sp: heap) {
             sb.append(sp.toString());
@@ -1056,7 +1062,7 @@ public class PipelinedSorter extends ExternalSorter {
         }
         LOG.info("Heap = " + sb.toString());
         return true;
-      } catch(Exception e) {
+      } catch(ExecutionException e) {
         LOG.info(e.toString());
         return false;
       }

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/dflt/DefaultSorter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/dflt/DefaultSorter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/dflt/DefaultSorter.java
index 9783c79..afe07f0 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/dflt/DefaultSorter.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/dflt/DefaultSorter.java
@@ -32,6 +32,7 @@ import java.util.concurrent.locks.ReentrantLock;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Lists;
 import org.apache.commons.lang.exception.ExceptionUtils;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -607,7 +608,7 @@ public class DefaultSorter extends ExternalSorter implements IndexedSortable {
                 }
               } catch (InterruptedException e) {
                 Thread.currentThread().interrupt();
-                  throw new IOException(
+                  throw new IOInterruptedException(
                       "Buffer interrupted while waiting for the writer", e);
               }
             }
@@ -644,7 +645,7 @@ public class DefaultSorter extends ExternalSorter implements IndexedSortable {
       LOG.info("Spill thread interrupted");
       //Reset status
       Thread.currentThread().interrupt();
-      throw new IOException("Spill failed", e);
+      throw new IOInterruptedException("Spill failed", e);
     }
   }
 
@@ -769,7 +770,11 @@ public class DefaultSorter extends ExternalSorter implements IndexedSortable {
             + " failed : " + ExceptionUtils.getStackTrace(lspillException);
         outputContext.fatalError(lspillException, logMsg);
       }
-      throw new IOException("Spill failed", lspillException);
+      if (lspillException instanceof InterruptedException) {
+        throw new IOInterruptedException("Spill failed", lspillException);
+      } else {
+        throw new IOException("Spill failed", lspillException);
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/UnorderedPartitionedKVWriter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/UnorderedPartitionedKVWriter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/UnorderedPartitionedKVWriter.java
index 37d8be6..9a98cd1 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/UnorderedPartitionedKVWriter.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/UnorderedPartitionedKVWriter.java
@@ -54,6 +54,7 @@ import org.apache.tez.common.counters.TezCounter;
 import org.apache.tez.runtime.api.Event;
 import org.apache.tez.runtime.api.OutputContext;
 import org.apache.tez.runtime.api.events.CompositeDataMovementEvent;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
 import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
 import org.apache.tez.runtime.library.common.Constants;
 import org.apache.tez.runtime.library.common.sort.impl.IFile;
@@ -354,7 +355,7 @@ public class UnorderedPartitionedKVWriter extends BaseUnorderedPartitionedKVWrit
           return availableBuffers.take();
         } catch (InterruptedException e) {
           Thread.currentThread().interrupt();
-          throw new IOException("Interrupted while waiting for next buffer", e);
+          throw new IOInterruptedException("Interrupted while waiting for next buffer", e);
         }
       }
     } else {

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValueInput.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValueInput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValueInput.java
index 14b1e2c..45784d9 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValueInput.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValueInput.java
@@ -64,7 +64,13 @@ public class ConcatenatedMergedKeyValueInput extends MergedLogicalInput {
           currentReader = (KeyValueReader) reader;
           currentReaderIndex++;
         } catch (Exception e) {
-          throw new IOException(e);
+          // An InterruptedException is not expected here since this works off of
+          // underlying readers which take care of throwing IOInterruptedExceptions
+          if (e instanceof IOException) {
+            throw (IOException) e;
+          } else {
+            throw new IOException(e);
+          }
         }
       }
       return true;

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValuesInput.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValuesInput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValuesInput.java
index 2a1e4c6..27ff324 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValuesInput.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValuesInput.java
@@ -65,7 +65,13 @@ public class ConcatenatedMergedKeyValuesInput extends MergedLogicalInput {
           currentReader = (KeyValuesReader) reader;
           currentReaderIndex++;
         } catch (Exception e) {
-          throw new IOException(e);
+          // An InterruptedException is not expected here since this works off of
+          // underlying readers which take care of throwing IOInterruptedExceptions
+          if (e instanceof IOException) {
+            throw (IOException)e;
+          } else {
+            throw new IOException(e);
+          }
         }
       }
       return true;

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedKVInput.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedKVInput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedKVInput.java
index 49cf102..12a5955 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedKVInput.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedKVInput.java
@@ -27,6 +27,8 @@ import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -121,7 +123,7 @@ public class OrderedGroupedKVInput extends AbstractLogicalInput {
     if (!isStarted.get()) {
       memoryUpdateCallbackHandler.validateUpdateReceived();
       // Start the shuffle - copy and merge
-      shuffle = new Shuffle(getContext(), conf, getNumPhysicalInputs(), memoryUpdateCallbackHandler.getMemoryAssigned());
+      shuffle = createShuffle();
       shuffle.run();
       if (LOG.isDebugEnabled()) {
         LOG.debug("Initialized the handlers in shuffle..Safe to start processing..");
@@ -137,6 +139,11 @@ public class OrderedGroupedKVInput extends AbstractLogicalInput {
     }
   }
 
+  @VisibleForTesting
+  Shuffle createShuffle() throws IOException {
+    return new Shuffle(getContext(), conf, getNumPhysicalInputs(), memoryUpdateCallbackHandler.getMemoryAssigned());
+  }
+
   /**
    * Check if the input is ready for consumption
    *
@@ -207,6 +214,7 @@ public class OrderedGroupedKVInput extends AbstractLogicalInput {
    * previous K-V pair will throw an Exception
    *
    * @return a KVReader over the sorted input.
+   * @throws {@link IOInterruptedException} if IO was performing a blocking operation and was interrupted
    */
   @Override
   public KeyValuesReader getReader() throws IOException, TezException {
@@ -240,7 +248,7 @@ public class OrderedGroupedKVInput extends AbstractLogicalInput {
         waitForInputReady();
       } catch (InterruptedException e) {
         Thread.currentThread().interrupt();
-        throw new IOException("Interrupted while waiting for input ready", e);
+        throw new IOInterruptedException("Interrupted while waiting for input ready", e);
       }
     }
     @SuppressWarnings("rawtypes")

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/readers/TestUnorderedKVReader.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/readers/TestUnorderedKVReader.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/readers/TestUnorderedKVReader.java
index 51ea42d..80bdc42 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/readers/TestUnorderedKVReader.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/readers/TestUnorderedKVReader.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.tez.common.counters.TaskCounter;
 import org.apache.tez.common.counters.TezCounter;
 import org.apache.tez.common.counters.TezCounters;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
 import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
 import org.apache.tez.runtime.library.common.InputAttemptIdentifier;
 import org.apache.tez.runtime.library.common.shuffle.FetchedInput;
@@ -48,6 +49,7 @@ import java.util.LinkedList;
 import static junit.framework.TestCase.fail;
 import static org.mockito.Mockito.doAnswer;
 import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.doThrow;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.spy;
 
@@ -165,4 +167,24 @@ public class TestUnorderedKVReader {
     }
   }
 
+  @Test(timeout = 5000)
+  public void testInterruptOnNext() throws IOException, InterruptedException {
+    ShuffleManager shuffleManager = mock(ShuffleManager.class);
+
+    // Simulate an interrupt while waiting for the next fetched input.
+    doThrow(new InterruptedException()).when(shuffleManager).getNextInput();
+    TezCounters counters = new TezCounters();
+    TezCounter inputRecords = counters.findCounter(TaskCounter.INPUT_RECORDS_PROCESSED);
+    UnorderedKVReader<Text, Text> reader =
+        new UnorderedKVReader<Text, Text>(shuffleManager, defaultConf, null, false, -1, -1,
+            inputRecords);
+
+    try {
+      reader.next();
+      fail("No data available to reader. Should not be able to access any record");
+    } catch (IOInterruptedException e) {
+      // Expected exception. Any other should fail the test.
+    }
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/input/TestOrderedGroupedKVInput.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/input/TestOrderedGroupedKVInput.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/input/TestOrderedGroupedKVInput.java
new file mode 100644
index 0000000..d4be802
--- /dev/null
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/input/TestOrderedGroupedKVInput.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.runtime.library.input;
+
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.tez.common.TezUtils;
+import org.apache.tez.common.counters.TezCounters;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezException;
+import org.apache.tez.dag.api.UserPayload;
+import org.apache.tez.runtime.api.InputContext;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
+import org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler;
+import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle;
+import org.junit.Assert;
+import org.junit.Test;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+public class TestOrderedGroupedKVInput {
+
+  @Test(timeout = 5000)
+  public void testInterruptWhileAwaitingInput() throws IOException, TezException {
+
+    InputContext inputContext = createMockInputContext();
+    OrderedGroupedKVInput kvInput = new OrderedGroupedKVInputForTest(inputContext, 10);
+    kvInput.initialize();
+
+    kvInput.start();
+
+    try {
+      kvInput.getReader();
+      Assert.fail("getReader should not return since underlying inputs are not ready");
+    } catch (IOException e) {
+      Assert.assertTrue(e instanceof IOInterruptedException);
+    }
+
+  }
+
+
+  private InputContext createMockInputContext() throws IOException {
+    InputContext inputContext = mock(InputContext.class);
+    Configuration conf = new TezConfiguration();
+    UserPayload payLoad = TezUtils.createUserPayloadFromConf(conf);
+    String[] workingDirs = new String[]{"workDir1"};
+    TezCounters counters = new TezCounters();
+
+
+    doReturn(payLoad).when(inputContext).getUserPayload();
+    doReturn(workingDirs).when(inputContext).getWorkDirs();
+    doReturn(200 * 1024 * 1024l).when(inputContext).getTotalMemoryAvailableToTask();
+    doReturn(counters).when(inputContext).getCounters();
+
+    doAnswer(new Answer() {
+      @Override
+      public Object answer(InvocationOnMock invocation) throws Throwable {
+        Object[] args = invocation.getArguments();
+
+        if (args[1] instanceof MemoryUpdateCallbackHandler) {
+          MemoryUpdateCallbackHandler memUpdateCallbackHandler =
+              (MemoryUpdateCallbackHandler) args[1];
+          memUpdateCallbackHandler.memoryAssigned(200 * 1024 * 1024);
+        } else {
+          Assert.fail();
+        }
+        return null;
+      }
+    }).when(inputContext).requestInitialMemory(any(long.class),
+        any(MemoryUpdateCallbackHandler.class));
+
+    return inputContext;
+  }
+
+  static class OrderedGroupedKVInputForTest extends OrderedGroupedKVInput {
+
+    public OrderedGroupedKVInputForTest(InputContext inputContext, int numPhysicalInputs) {
+      super(inputContext, numPhysicalInputs);
+    }
+
+    Shuffle createShuffle() throws IOException {
+      Shuffle shuffle = mock(Shuffle.class);
+      try {
+        doThrow(new InterruptedException()).when(shuffle).waitForInput();
+      } catch (InterruptedException e) {
+        Assert.fail();
+      } catch (TezException e) {
+        Assert.fail();
+      }
+      return shuffle;
+    }
+  }
+
+}


[34/43] tez git commit: TEZ-2283. Fixes after rebase 04/07. (sseth)

Posted by ss...@apache.org.
TEZ-2283. Fixes after rebase 04/07. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/26164791
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/26164791
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/26164791

Branch: refs/heads/TEZ-2003
Commit: 2616479150b7d4c6c0f7dd0359fff5e99b75869d
Parents: 5a46aa5
Author: Siddharth Seth <ss...@apache.org>
Authored: Tue Apr 7 13:13:30 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:30 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                                         | 1 +
 .../java/org/apache/tez/dag/app/rm/TestContainerReuse.java   | 8 ++++----
 .../org/apache/tez/service/impl/ContainerRunnerImpl.java     | 4 +---
 3 files changed, 6 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/26164791/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index d51686d..6a4399c 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -12,5 +12,6 @@ ALL CHANGES:
   TEZ-2175. Task priority should be available to the TaskCommunicator plugin.
   TEZ-2187. Allow TaskCommunicators to report failed / killed attempts.
   TEZ-2241. Miscellaneous fixes after last reabse.
+  TEZ-2283. Fixes after rebase 04/07.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/26164791/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
index d45346a..8b4ff07 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
@@ -1242,9 +1242,9 @@ public class TestContainerReuse {
     assertEquals(1, assignEvent.getRemoteTaskLocalResources().size());
 
     // Task assigned to container completed successfully. Container should be re-used.
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta111, container1.getId(), TaskAttemptState.SUCCEEDED));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta111, container1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
     drainableAppCallback.drain();
-    verify(taskScheduler).deallocateTask(eq(ta111), eq(true));
+    verify(taskScheduler).deallocateTask(eq(ta111), eq(true), eq((TaskAttemptEndReason)null));
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta112), any(Object.class), eq(container1));
     verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
     eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
@@ -1254,9 +1254,9 @@ public class TestContainerReuse {
 
     // Task assigned to container completed successfully.
     // Verify reuse across hosts.
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta112, container1.getId(), TaskAttemptState.SUCCEEDED));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta112, container1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
     drainableAppCallback.drain();
-    verify(taskScheduler).deallocateTask(eq(ta112), eq(true));
+    verify(taskScheduler).deallocateTask(eq(ta112), eq(true), eq((TaskAttemptEndReason)null));
     verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
     eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
     eventHandler.reset();

http://git-wip-us.apache.org/repos/asf/tez/blob/26164791/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
index e7c02c8..f3fc442 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
@@ -69,10 +69,8 @@ import org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl;
 import org.apache.tez.runtime.task.TezChild;
 import org.apache.tez.runtime.task.TezChild.ContainerExecutionResult;
 import org.apache.tez.shufflehandler.ShuffleHandler;
-import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
-import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.TaskSpecProto;
 import org.apache.tez.util.ProtoConverters;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -446,7 +444,7 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
           request.getContainerIdString());
 
       taskRunner = new TezTaskRunner(conf, taskUgi, localDirs,
-          ProtoConverters.getTaskSpecfromProto(request.getTaskSpec()), umbilical,
+          ProtoConverters.getTaskSpecfromProto(request.getTaskSpec()),
           request.getAppAttemptNumber(),
           serviceConsumerMetadata, envMap, startedInputsMap, taskReporter, executor, objectRegistry,
           pid,


[29/43] tez git commit: TEZ-2187. Allow TaskCommunicators to report failed / killed attempts. (sseth)

Posted by ss...@apache.org.
TEZ-2187. Allow TaskCommunicators to report failed / killed attempts.  (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/e1ab1914
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/e1ab1914
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/e1ab1914

Branch: refs/heads/TEZ-2003
Commit: e1ab191494658b5470d12d4b25b016530b28d398
Parents: 99a1b85
Author: Siddharth Seth <ss...@apache.org>
Authored: Tue Mar 10 01:25:39 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:30 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                            |   1 +
 .../org/apache/tez/common/TezUtilsInternal.java |  60 +++++++++
 .../tez/dag/api/TaskAttemptEndReason.java       |  24 ++++
 .../records/TaskAttemptTerminationCause.java    |   7 +-
 .../apache/tez/dag/api/TaskCommunicator.java    |   2 +
 .../tez/dag/api/TaskCommunicatorContext.java    |  13 +-
 .../dag/app/TaskAttemptListenerImpTezDag.java   |  33 +++++
 .../event/TaskAttemptEventAttemptFailed.java    |   2 +
 .../event/TaskAttemptEventAttemptKilled.java    |  47 +++++++
 .../dag/app/dag/event/TaskAttemptEventType.java |   5 +-
 .../tez/dag/app/dag/impl/TaskAttemptImpl.java   |  33 ++++-
 .../tez/dag/app/rm/AMSchedulerEventTAEnded.java |   9 +-
 .../dag/app/rm/LocalTaskSchedulerService.java   |   3 +-
 .../dag/app/rm/TaskSchedulerEventHandler.java   |   7 +-
 .../tez/dag/app/rm/TaskSchedulerService.java    |   6 +-
 .../dag/app/rm/YarnTaskSchedulerService.java    |   8 +-
 .../app/TestTaskAttemptListenerImplTezDag.java  |   1 +
 .../app/TestTaskAttemptListenerImplTezDag2.java | 126 +++++++++++++++++++
 .../tez/dag/app/rm/TestContainerReuse.java      |  65 +++++-----
 .../app/rm/TestLocalTaskSchedulerService.java   |   5 +-
 .../tez/dag/app/rm/TestTaskScheduler.java       |  18 +--
 .../rm/TezTestServiceTaskSchedulerService.java  |   3 +-
 .../TezTestServiceTaskCommunicatorImpl.java     |  36 +++++-
 .../org/apache/tez/service/ContainerRunner.java |   5 +-
 .../tez/service/MiniTezTestServiceCluster.java  |   5 +-
 .../tez/service/impl/ContainerRunnerImpl.java   |  60 +++++++--
 .../apache/tez/service/impl/TezTestService.java |   6 +-
 .../impl/TezTestServiceProtocolServerImpl.java  |  10 +-
 .../tez/tests/TestExternalTezServices.java      |  29 +++++
 29 files changed, 548 insertions(+), 81 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 7726815..774a685 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -10,5 +10,6 @@ ALL CHANGES:
   TEZ-2138. Fix minor bugs in adding default scheduler, getting launchers.
   TEZ-2139. Update tez version to 0.7.0-TEZ-2003-SNAPSHOT.
   TEZ-2175. Task priority should be available to the TaskCommunicator plugin.
+  TEZ-2187. Allow TaskCommunicators to report failed / killed attempts.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java
----------------------------------------------------------------------
diff --git a/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java b/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java
index 9c78377..347a4f6 100644
--- a/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java
+++ b/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.security.Credentials;
 import org.apache.log4j.Appender;
 import org.apache.tez.dag.api.DagTypeConverters;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
 import org.apache.tez.dag.api.TezConstants;
 import org.apache.tez.dag.api.records.DAGProtos;
 import org.apache.tez.dag.api.records.DAGProtos.ConfigurationProto;
@@ -49,6 +50,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import com.google.common.base.Stopwatch;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 
 @Private
 public class TezUtilsInternal {
@@ -234,4 +236,62 @@ public class TezUtilsInternal {
     return sb.toString();
   }
 
+  public static TaskAttemptTerminationCause fromTaskAttemptEndReason(
+      TaskAttemptEndReason taskAttemptEndReason) {
+    if (taskAttemptEndReason == null) {
+      return null;
+    }
+    switch (taskAttemptEndReason) {
+      case COMMUNICATION_ERROR:
+        return TaskAttemptTerminationCause.COMMUNICATION_ERROR;
+      case SERVICE_BUSY:
+        return TaskAttemptTerminationCause.SERVICE_BUSY;
+      case INTERRUPTED_BY_SYSTEM:
+        return TaskAttemptTerminationCause.INTERRUPTED_BY_SYSTEM;
+      case INTERRUPTED_BY_USER:
+        return TaskAttemptTerminationCause.INTERRUPTED_BY_USER;
+      case OTHER:
+        return TaskAttemptTerminationCause.UNKNOWN_ERROR;
+      default:
+        return TaskAttemptTerminationCause.UNKNOWN_ERROR;
+    }
+  }
+
+  public static TaskAttemptEndReason toTaskAttemptEndReason(TaskAttemptTerminationCause cause) {
+    // TODO Post TEZ-2003. Consolidate these states, and mappings.
+    if (cause == null) {
+      return null;
+    }
+    switch (cause) {
+      case COMMUNICATION_ERROR:
+        return TaskAttemptEndReason.COMMUNICATION_ERROR;
+      case SERVICE_BUSY:
+        return TaskAttemptEndReason.SERVICE_BUSY;
+      case INTERRUPTED_BY_SYSTEM:
+        return TaskAttemptEndReason.INTERRUPTED_BY_SYSTEM;
+      case INTERRUPTED_BY_USER:
+        return TaskAttemptEndReason.INTERRUPTED_BY_USER;
+      case UNKNOWN_ERROR:
+      case TERMINATED_BY_CLIENT:
+      case TERMINATED_AT_SHUTDOWN:
+      case INTERNAL_PREEMPTION:
+      case EXTERNAL_PREEMPTION:
+      case TERMINATED_INEFFECTIVE_SPECULATION:
+      case TERMINATED_EFFECTIVE_SPECULATION:
+      case TERMINATED_ORPHANED:
+      case APPLICATION_ERROR:
+      case FRAMEWORK_ERROR:
+      case INPUT_READ_ERROR:
+      case OUTPUT_WRITE_ERROR:
+      case OUTPUT_LOST:
+      case TASK_HEARTBEAT_ERROR:
+      case CONTAINER_LAUNCH_FAILED:
+      case CONTAINER_EXITED:
+      case CONTAINER_STOPPED:
+      case NODE_FAILED:
+      case NODE_DISK_ERROR:
+      default:
+        return TaskAttemptEndReason.OTHER;
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-common/src/main/java/org/apache/tez/dag/api/TaskAttemptEndReason.java
----------------------------------------------------------------------
diff --git a/tez-common/src/main/java/org/apache/tez/dag/api/TaskAttemptEndReason.java b/tez-common/src/main/java/org/apache/tez/dag/api/TaskAttemptEndReason.java
new file mode 100644
index 0000000..96a4768
--- /dev/null
+++ b/tez-common/src/main/java/org/apache/tez/dag/api/TaskAttemptEndReason.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.api;
+
+// TODO TEZ-2003 Expose as a public API
+public enum TaskAttemptEndReason {
+  COMMUNICATION_ERROR,
+  SERVICE_BUSY,
+  INTERRUPTED_BY_SYSTEM,
+  INTERRUPTED_BY_USER,
+  OTHER
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java
----------------------------------------------------------------------
diff --git a/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java b/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java
index ef0bb33..7112d9e 100644
--- a/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java
+++ b/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java
@@ -41,5 +41,10 @@ public enum TaskAttemptTerminationCause {
   CONTAINER_STOPPED, // Container stopped or released by Tez
   NODE_FAILED, // Node for the container failed
   NODE_DISK_ERROR, // Disk failed on the node runnign the task
-  
+
+  COMMUNICATION_ERROR, // Equivalent to a launch failure
+  SERVICE_BUSY, // Service rejected the task
+  INTERRUPTED_BY_SYSTEM, // Interrupted by the system. e.g. Pre-emption
+  INTERRUPTED_BY_USER, // Interrupted by the user
+
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
index 82eed20..945091e 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
@@ -45,6 +45,8 @@ public abstract class TaskCommunicator extends AbstractService {
                                                   Credentials credentials,
                                                   boolean credentialsChanged, int priority);
 
+  // TODO TEZ-2003. Are additional APIs required to mark a container as completed ? - for completeness.
+
   // TODO TEZ-2003 Remove reference to TaskAttemptID
   public abstract void unregisterRunningTaskAttempt(TezTaskAttemptID taskAttemptID);
 

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
index 41675fe..a85fb7f 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
@@ -14,6 +14,7 @@
 
 package org.apache.tez.dag.api;
 
+import javax.annotation.Nullable;
 import java.io.IOException;
 
 import org.apache.hadoop.security.Credentials;
@@ -37,15 +38,21 @@ public interface TaskCommunicatorContext {
   // TODO TEZ-2003 Move to vertex, taskIndex, version
   boolean canCommit(TezTaskAttemptID taskAttemptId) throws IOException;
 
+  // TODO TEZ-2003 Split the heartbeat API to a liveness check and a status update
   TaskHeartbeatResponse heartbeat(TaskHeartbeatRequest request) throws IOException, TezException;
 
   boolean isKnownContainer(ContainerId containerId);
 
-  // TODO TEZ-2003 Move to vertex, taskIndex, version
+  // TODO TEZ-2003 Move to vertex, taskIndex, version. Rename to taskAttempt*
   void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId);
 
-  // TODO TEZ-2003 Add an API to register task failure - for example, a communication failure.
-  // This will have to take into consideration the TA_FAILED event
+  // TODO TEZ-2003 Move to vertex, taskIndex, version. Rename to taskAttempt*
+  void taskKilled(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason, @Nullable String diagnostics);
+
+  // TODO TEZ-2003 Move to vertex, taskIndex, version. Rename to taskAttempt*
+  void taskFailed(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason, @Nullable String diagnostics);
+
+  // TODO TEZ-2003 API. Should a method exist for task succeeded.
 
   // TODO Eventually Add methods to report availability stats to the scheduler.
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index b570301..94f6cae 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -37,14 +37,17 @@ import org.apache.tez.runtime.api.events.TaskStatusUpdateEvent;
 import org.apache.tez.runtime.api.impl.EventType;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.tez.common.ReflectionUtils;
+import org.apache.tez.common.TezUtilsInternal;
 import org.apache.tez.dag.api.TaskCommunicator;
 import org.apache.tez.dag.api.TaskCommunicatorContext;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
 import org.apache.tez.dag.api.TaskHeartbeatResponse;
 import org.apache.tez.dag.api.TezConstants;
 import org.apache.tez.dag.api.TezException;
@@ -54,7 +57,10 @@ import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.tez.dag.api.TaskHeartbeatRequest;
 import org.apache.tez.dag.app.dag.DAG;
 import org.apache.tez.dag.app.dag.Task;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptKilled;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
 import org.apache.tez.dag.app.dag.event.VertexEventRouteEvent;
 import org.apache.tez.dag.app.rm.container.AMContainerTask;
 import org.apache.tez.dag.records.TezTaskAttemptID;
@@ -257,6 +263,33 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
     pingContainerHeartbeatHandler(containerId);
   }
 
+  @Override
+  public void taskKilled(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason,
+                         String diagnostics) {
+    // Regular flow via TaskAttempt will take care of un-registering from the heartbeat handler,
+    // and messages from the scheduler will release the container.
+    // TODO TEZ-2003 Maybe consider un-registering here itself, since the task is not active anymore,
+    // instead of waiting for the unregister to flow through the Container.
+    // Fix along the same lines as TEZ-2124 by introducing an explict context.
+    context.getEventHandler().handle(new TaskAttemptEventAttemptKilled(taskAttemptId,
+        diagnostics, TezUtilsInternal.fromTaskAttemptEndReason(
+        taskAttemptEndReason)));
+  }
+
+  @Override
+  public void taskFailed(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason,
+                         String diagnostics) {
+    // Regular flow via TaskAttempt will take care of un-registering from the heartbeat handler,
+    // and messages from the scheduler will release the container.
+    // TODO TEZ-2003 Maybe consider un-registering here itself, since the task is not active anymore,
+    // instead of waiting for the unregister to flow through the Container.
+    // Fix along the same lines as TEZ-2124 by introducing an explict context.
+    context.getEventHandler().handle(new TaskAttemptEventAttemptFailed(taskAttemptId,
+        TaskAttemptEventType.TA_FAILED, diagnostics, TezUtilsInternal.fromTaskAttemptEndReason(
+        taskAttemptEndReason)));
+  }
+
+
   /**
    * Child checking whether it can commit.
    * <p/>

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java
index b9c1d09..7ec8921 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java
@@ -26,6 +26,8 @@ public class TaskAttemptEventAttemptFailed extends TaskAttemptEvent
 
   private final String diagnostics;
   private final TaskAttemptTerminationCause errorCause;
+
+  /* Accepted Types - FAILED, TIMED_OUT */
   public TaskAttemptEventAttemptFailed(TezTaskAttemptID id,
       TaskAttemptEventType type, String diagnostics, TaskAttemptTerminationCause errorCause) {
     super(id, type);

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptKilled.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptKilled.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptKilled.java
new file mode 100644
index 0000000..72e6b07
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptKilled.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app.dag.event;
+
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+
+public class TaskAttemptEventAttemptKilled extends TaskAttemptEvent
+    implements DiagnosableEvent, TaskAttemptEventTerminationCauseEvent {
+
+  private final String diagnostics;
+  private final TaskAttemptTerminationCause errorCause;
+  public TaskAttemptEventAttemptKilled(TezTaskAttemptID id,
+                                       String diagnostics,
+                                       TaskAttemptTerminationCause errorCause) {
+    super(id, TaskAttemptEventType.TA_KILLED);
+    this.diagnostics = diagnostics;
+    this.errorCause = errorCause;
+  }
+
+  @Override
+  public String getDiagnosticInfo() {
+    return diagnostics;
+  }
+
+  @Override
+  public TaskAttemptTerminationCause getTerminationCause() {
+    return errorCause;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventType.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventType.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventType.java
index b7aca36..6d20368 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventType.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventType.java
@@ -26,14 +26,15 @@ public enum TaskAttemptEventType {
 //Producer:Task, Speculator
   TA_SCHEDULE,
 
-//Producer: TaskAttemptListener
+//Producer: TaskAttemptListener | Vertex after routing events
   TA_STARTED_REMOTELY,
   TA_STATUS_UPDATE,
   TA_DIAGNOSTICS_UPDATE, // REMOVE THIS - UNUSED
   TA_DONE,
   TA_FAILED,
+  TA_KILLED, // Generated by TaskCommunicators
   TA_TIMED_OUT,
-  
+
 //Producer: Client, Scheduler, On speculation.
   TA_KILL_REQUEST,
 

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
index c80571d..11d4df9 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
@@ -51,6 +51,7 @@ import org.apache.hadoop.yarn.state.StateMachineFactory;
 import org.apache.hadoop.yarn.util.Clock;
 import org.apache.hadoop.yarn.util.RackResolver;
 import org.apache.hadoop.yarn.util.Records;
+import org.apache.tez.common.TezUtilsInternal;
 import org.apache.tez.common.counters.DAGCounter;
 import org.apache.tez.common.counters.TezCounters;
 import org.apache.tez.dag.api.ProcessorDescriptor;
@@ -185,6 +186,11 @@ public class TaskAttemptImpl implements TaskAttempt,
 
   private final StateMachine<TaskAttemptStateInternal, TaskAttemptEventType, TaskAttemptEvent> stateMachine;
 
+  // TODO TEZ-2003 We may need some additional state management for STATUS_UPDATES, FAILED, KILLED coming in before
+  // TASK_STARTED_REMOTELY. In case of a PUSH it's more intuitive to send TASK_STARTED_REMOTELY after communicating
+  // with the listening service and getting a response, which in turn can trigger STATUS_UPDATES / FAILED / KILLED
+
+  // TA_KILLED handled the same as TA_KILL_REQUEST. Just a different name indicating a request / already killed.
   private static StateMachineFactory
   <TaskAttemptImpl, TaskAttemptStateInternal, TaskAttemptEventType, TaskAttemptEvent>
   stateMachineFactory
@@ -225,6 +231,10 @@ public class TaskAttemptImpl implements TaskAttempt,
           new TerminatedBeforeRunningTransition(KILLED_HELPER))
       .addTransition(TaskAttemptStateInternal.START_WAIT,
           TaskAttemptStateInternal.KILL_IN_PROGRESS,
+          TaskAttemptEventType.TA_KILLED,
+          new TerminatedBeforeRunningTransition(KILLED_HELPER))
+      .addTransition(TaskAttemptStateInternal.START_WAIT,
+          TaskAttemptStateInternal.KILL_IN_PROGRESS,
           TaskAttemptEventType.TA_NODE_FAILED,
           new NodeFailedBeforeRunningTransition())
       .addTransition(TaskAttemptStateInternal.START_WAIT,
@@ -265,6 +275,10 @@ public class TaskAttemptImpl implements TaskAttempt,
           new TerminatedWhileRunningTransition(KILLED_HELPER))
       .addTransition(TaskAttemptStateInternal.RUNNING,
           TaskAttemptStateInternal.KILL_IN_PROGRESS,
+          TaskAttemptEventType.TA_KILLED,
+          new TerminatedWhileRunningTransition(KILLED_HELPER))
+      .addTransition(TaskAttemptStateInternal.RUNNING,
+          TaskAttemptStateInternal.KILL_IN_PROGRESS,
           TaskAttemptEventType.TA_NODE_FAILED,
           new TerminatedWhileRunningTransition(KILLED_HELPER))
       .addTransition(TaskAttemptStateInternal.RUNNING,
@@ -303,6 +317,7 @@ public class TaskAttemptImpl implements TaskAttempt,
               TaskAttemptEventType.TA_DONE, TaskAttemptEventType.TA_FAILED,
               TaskAttemptEventType.TA_TIMED_OUT,
               TaskAttemptEventType.TA_KILL_REQUEST,
+              TaskAttemptEventType.TA_KILLED,
               TaskAttemptEventType.TA_NODE_FAILED,
               TaskAttemptEventType.TA_CONTAINER_TERMINATING,
               TaskAttemptEventType.TA_OUTPUT_FAILED))
@@ -324,6 +339,7 @@ public class TaskAttemptImpl implements TaskAttempt,
               TaskAttemptEventType.TA_DONE, TaskAttemptEventType.TA_FAILED,
               TaskAttemptEventType.TA_TIMED_OUT,
               TaskAttemptEventType.TA_KILL_REQUEST,
+              TaskAttemptEventType.TA_KILLED,
               TaskAttemptEventType.TA_NODE_FAILED,
               TaskAttemptEventType.TA_CONTAINER_TERMINATING,
               TaskAttemptEventType.TA_OUTPUT_FAILED))
@@ -342,6 +358,7 @@ public class TaskAttemptImpl implements TaskAttempt,
               TaskAttemptEventType.TA_DONE, TaskAttemptEventType.TA_FAILED,
               TaskAttemptEventType.TA_TIMED_OUT,
               TaskAttemptEventType.TA_KILL_REQUEST,
+              TaskAttemptEventType.TA_KILLED,
               TaskAttemptEventType.TA_NODE_FAILED,
               TaskAttemptEventType.TA_CONTAINER_TERMINATING,
               TaskAttemptEventType.TA_CONTAINER_TERMINATED,
@@ -361,6 +378,7 @@ public class TaskAttemptImpl implements TaskAttempt,
               TaskAttemptEventType.TA_DONE, TaskAttemptEventType.TA_FAILED,
               TaskAttemptEventType.TA_TIMED_OUT,
               TaskAttemptEventType.TA_KILL_REQUEST,
+              TaskAttemptEventType.TA_KILLED,
               TaskAttemptEventType.TA_NODE_FAILED,
               TaskAttemptEventType.TA_CONTAINER_TERMINATING,
               TaskAttemptEventType.TA_CONTAINER_TERMINATED,
@@ -383,6 +401,12 @@ public class TaskAttemptImpl implements TaskAttempt,
           TaskAttemptStateInternal.SUCCEEDED,
           EnumSet.of(TaskAttemptStateInternal.KILLED,
               TaskAttemptStateInternal.SUCCEEDED),
+          TaskAttemptEventType.TA_KILLED,
+          new TerminatedAfterSuccessTransition())
+      .addTransition(
+          TaskAttemptStateInternal.SUCCEEDED,
+          EnumSet.of(TaskAttemptStateInternal.KILLED,
+              TaskAttemptStateInternal.SUCCEEDED),
           TaskAttemptEventType.TA_NODE_FAILED,
           new TerminatedAfterSuccessTransition())
       .addTransition(
@@ -434,7 +458,6 @@ public class TaskAttemptImpl implements TaskAttempt,
     this.leafVertex = leafVertex;
   }
 
-
   @Override
   public TezTaskAttemptID getID() {
     return attemptId;
@@ -1030,6 +1053,7 @@ public class TaskAttemptImpl implements TaskAttempt,
 
       // Compute node/rack location request even if re-scheduled.
       Set<String> racks = new HashSet<String>();
+      // TODO Post TEZ-2003. Allow for a policy in the VMPlugin to define localicty for different attempts.
       TaskLocationHint locationHint = ta.getTaskLocationHint();
       if (locationHint != null) {
         if (locationHint.getRacks() != null) {
@@ -1104,6 +1128,8 @@ public class TaskAttemptImpl implements TaskAttempt,
 
     @Override
     public void transition(TaskAttemptImpl ta, TaskAttemptEvent event) {
+      // This transition should not be invoked directly, if a scheduler event has already been sent out.
+      // Sub-classes should be used if a scheduler request has been sent.
       ta.setFinishTime();
 
       if (event instanceof DiagnosableEvent) {
@@ -1218,7 +1244,8 @@ public class TaskAttemptImpl implements TaskAttempt,
       // Inform the scheduler
       if (sendSchedulerEvent()) {
         ta.sendEvent(new AMSchedulerEventTAEnded(ta, ta.containerId, helper
-            .getTaskAttemptState(), ta.getVertex().getTaskSchedulerIdentifier()));
+            .getTaskAttemptState(), TezUtilsInternal.toTaskAttemptEndReason(ta.terminationCause),
+            ta.getVertex().getTaskSchedulerIdentifier()));
       }
     }
   }
@@ -1300,7 +1327,7 @@ public class TaskAttemptImpl implements TaskAttempt,
 
       // Inform the Scheduler.
       ta.sendEvent(new AMSchedulerEventTAEnded(ta, ta.containerId,
-          TaskAttemptState.SUCCEEDED, ta.getVertex().getTaskSchedulerIdentifier()));
+          TaskAttemptState.SUCCEEDED, null, ta.getVertex().getTaskSchedulerIdentifier()));
 
       // Inform the task.
       ta.sendEvent(new TaskEventTAUpdate(ta.attemptId,

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java
index 2ace642..a775948 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java
@@ -18,6 +18,7 @@
 package org.apache.tez.dag.app.rm;
 
 import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
 import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
 import org.apache.tez.dag.app.dag.TaskAttempt;
 import org.apache.tez.dag.records.TezTaskAttemptID;
@@ -27,14 +28,16 @@ public class AMSchedulerEventTAEnded extends AMSchedulerEvent {
   private final TaskAttempt attempt;
   private final ContainerId containerId;
   private final TaskAttemptState state;
+  private final TaskAttemptEndReason taskAttemptEndReason;
   private final int schedulerId;
 
   public AMSchedulerEventTAEnded(TaskAttempt attempt, ContainerId containerId,
-      TaskAttemptState state, int schedulerId) {
+      TaskAttemptState state, TaskAttemptEndReason taskAttemptEndReason, int schedulerId) {
     super(AMSchedulerEventType.S_TA_ENDED);
     this.attempt = attempt;
     this.containerId = containerId;
     this.state = state;
+    this.taskAttemptEndReason = taskAttemptEndReason;
     this.schedulerId = schedulerId;
   }
 
@@ -57,4 +60,8 @@ public class AMSchedulerEventTAEnded extends AMSchedulerEvent {
   public int getSchedulerId() {
     return schedulerId;
   }
+
+  public TaskAttemptEndReason getTaskAttemptEndReason() {
+    return taskAttemptEndReason;
+  }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
index 72a074f..a234e07 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
@@ -40,6 +40,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
 import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.app.AppContext;
@@ -149,7 +150,7 @@ public class LocalTaskSchedulerService extends TaskSchedulerService {
   }
   
   @Override
-  public boolean deallocateTask(Object task, boolean taskSucceeded) {
+  public boolean deallocateTask(Object task, boolean taskSucceeded, TaskAttemptEndReason endReason) {
     return taskRequestHandler.addDeallocateTaskRequest(task);
   }
 

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
index 8e5fc71..9f09f68 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
@@ -289,7 +289,9 @@ public class TaskSchedulerEventHandler extends AbstractService
 
   private void handleTAUnsuccessfulEnd(AMSchedulerEventTAEnded event) {
     TaskAttempt attempt = event.getAttempt();
-    boolean wasContainerAllocated = taskSchedulers[event.getSchedulerId()].deallocateTask(attempt, false);
+    // Propagate state and failure cause (if any) when informing the scheduler about the de-allocation.
+    boolean wasContainerAllocated = taskSchedulers[event.getSchedulerId()]
+        .deallocateTask(attempt, false, event.getTaskAttemptEndReason());
     // use stored value of container id in case the scheduler has removed this
     // assignment because the task has been deallocated earlier.
     // retroactive case
@@ -311,6 +313,7 @@ public class TaskSchedulerEventHandler extends AbstractService
       sendEvent(new AMContainerEventStopRequest(attemptContainerId));
       // Inform the Node - the task has asked to be STOPPED / has already
       // stopped.
+      // AMNodeImpl blacklisting logic does not account for KILLED attempts.
       sendEvent(new AMNodeEventTaskAttemptEnded(appContext.getAllContainers().
           get(attemptContainerId).getContainer().getNodeId(), attemptContainerId,
           attempt.getID(), event.getState() == TaskAttemptState.FAILED));
@@ -332,7 +335,7 @@ public class TaskSchedulerEventHandler extends AbstractService
     }
 
     boolean wasContainerAllocated = taskSchedulers[event.getSchedulerId()].deallocateTask(attempt,
-        true);
+        true, null);
     if (!wasContainerAllocated) {
       LOG.error("De-allocated successful task: " + attempt.getID()
           + ", but TaskScheduler reported no container assigned to task");

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerService.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerService.java
index 48d5455..07dfcd6 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerService.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerService.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
 
 public abstract class TaskSchedulerService extends AbstractService{
 
@@ -61,8 +62,9 @@ public abstract class TaskSchedulerService extends AbstractService{
   public abstract void allocateTask(Object task, Resource capability,
       ContainerId containerId, Priority priority, Object containerSignature,
       Object clientCookie);
-  
-  public abstract boolean deallocateTask(Object task, boolean taskSucceeded);
+
+  /** Plugin writers must ensure to de-allocate a container once it's done, so that it can be collected. */
+  public abstract boolean deallocateTask(Object task, boolean taskSucceeded, TaskAttemptEndReason endReason);
 
   public abstract Object deallocateContainer(ContainerId containerId);
 

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java
index 44f5484..1fc9ac2 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.util.RackResolver;
 import org.apache.hadoop.yarn.util.resource.Resources;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
 import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.app.AppContext;
@@ -987,10 +988,13 @@ public class YarnTaskSchedulerService extends TaskSchedulerService
    *          the task to de-allocate.
    * @param taskSucceeded
    *          specify whether the task succeeded or failed.
+   * @param endReason
+   *          reason for the task ending
    * @return true if a container is assigned to this task.
    */
   @Override
-  public boolean deallocateTask(Object task, boolean taskSucceeded) {
+  public boolean deallocateTask(Object task, boolean taskSucceeded,
+                                TaskAttemptEndReason endReason) {
     Map<CookieContainerRequest, Container> assignedContainers = null;
 
     synchronized (this) {
@@ -1180,7 +1184,7 @@ public class YarnTaskSchedulerService extends TaskSchedulerService
             CookieContainerRequest request = entry.getValue();
             if (request.getPriority().equals(lowestPriNewContainer.getPriority())) {
               LOG.info("Resending request for task again: " + task);
-              deallocateTask(task, true);
+              deallocateTask(task, true, null);
               allocateTask(task, request.getCapability(), 
                   (request.getNodes() == null ? null : 
                     request.getNodes().toArray(new String[request.getNodes().size()])), 

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
index 0cf1959..076f9e0 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
@@ -76,6 +76,7 @@ import org.junit.Test;
 import org.mockito.ArgumentCaptor;
 
 @SuppressWarnings("unchecked")
+// TODO TEZ-2003 Rename to TestTezTaskCommunicator
 public class TestTaskAttemptListenerImplTezDag {
   private ApplicationId appId;
   private AppContext appContext;

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag2.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag2.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag2.java
new file mode 100644
index 0000000..934543f
--- /dev/null
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag2.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.event.Event;
+import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
+import org.apache.tez.dag.app.dag.DAG;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptKilled;
+import org.apache.tez.dag.app.rm.container.AMContainer;
+import org.apache.tez.dag.app.rm.container.AMContainerMap;
+import org.apache.tez.dag.app.rm.container.AMContainerTask;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.runtime.api.impl.TaskSpec;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+
+// TODO TEZ-2003. Rename to TestTaskAttemptListener | whatever TaskAttemptListener is renamed to.
+public class TestTaskAttemptListenerImplTezDag2 {
+
+  @Test(timeout = 5000)
+  public void testTaskAttemptFailedKilled() {
+    ApplicationId appId = ApplicationId.newInstance(1000, 1);
+    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
+    Credentials credentials = new Credentials();
+    AppContext appContext = mock(AppContext.class);
+    EventHandler eventHandler = mock(EventHandler.class);
+    DAG dag = mock(DAG.class);
+    AMContainerMap amContainerMap = mock(AMContainerMap.class);
+    Map<ApplicationAccessType, String> appAcls = new HashMap<ApplicationAccessType, String>();
+    doReturn(eventHandler).when(appContext).getEventHandler();
+    doReturn(dag).when(appContext).getCurrentDAG();
+    doReturn(appAttemptId).when(appContext).getApplicationAttemptId();
+    doReturn(credentials).when(appContext).getAppCredentials();
+    doReturn(appAcls).when(appContext).getApplicationACLs();
+    doReturn(amContainerMap).when(appContext).getAllContainers();
+    NodeId nodeId = NodeId.newInstance("localhost", 0);
+    AMContainer amContainer = mock(AMContainer.class);
+    Container container = mock(Container.class);
+    doReturn(nodeId).when(container).getNodeId();
+    doReturn(amContainer).when(amContainerMap).get(any(ContainerId.class));
+    doReturn(container).when(amContainer).getContainer();
+
+    TaskAttemptListenerImpTezDag taskAttemptListener =
+        new TaskAttemptListenerImpTezDag(appContext, mock(TaskHeartbeatHandler.class),
+            mock(ContainerHeartbeatHandler.class), null, null, false);
+
+    TaskSpec taskSpec1 = mock(TaskSpec.class);
+    TezTaskAttemptID taskAttemptId1 = mock(TezTaskAttemptID.class);
+    doReturn(taskAttemptId1).when(taskSpec1).getTaskAttemptID();
+    AMContainerTask amContainerTask1 = new AMContainerTask(taskSpec1, null, null, false, 10);
+
+    TaskSpec taskSpec2 = mock(TaskSpec.class);
+    TezTaskAttemptID taskAttemptId2 = mock(TezTaskAttemptID.class);
+    doReturn(taskAttemptId2).when(taskSpec2).getTaskAttemptID();
+    AMContainerTask amContainerTask2 = new AMContainerTask(taskSpec2, null, null, false, 10);
+
+    ContainerId containerId1 = createContainerId(appId, 1);
+    taskAttemptListener.registerRunningContainer(containerId1, 0);
+    taskAttemptListener.registerTaskAttempt(amContainerTask1, containerId1, 0);
+    ContainerId containerId2 = createContainerId(appId, 2);
+    taskAttemptListener.registerRunningContainer(containerId2, 0);
+    taskAttemptListener.registerTaskAttempt(amContainerTask2, containerId2, 0);
+
+
+    taskAttemptListener
+        .taskFailed(taskAttemptId1, TaskAttemptEndReason.COMMUNICATION_ERROR, "Diagnostics1");
+    taskAttemptListener
+        .taskKilled(taskAttemptId2, TaskAttemptEndReason.SERVICE_BUSY, "Diagnostics2");
+
+    ArgumentCaptor<Event> argumentCaptor = ArgumentCaptor.forClass(Event.class);
+    verify(eventHandler, times(2)).handle(argumentCaptor.capture());
+    assertTrue(argumentCaptor.getAllValues().get(0) instanceof TaskAttemptEventAttemptFailed);
+    assertTrue(argumentCaptor.getAllValues().get(1) instanceof TaskAttemptEventAttemptKilled);
+    TaskAttemptEventAttemptFailed failedEvent =
+        (TaskAttemptEventAttemptFailed) argumentCaptor.getAllValues().get(0);
+    TaskAttemptEventAttemptKilled killedEvent =
+        (TaskAttemptEventAttemptKilled) argumentCaptor.getAllValues().get(1);
+
+    assertEquals("Diagnostics1", failedEvent.getDiagnosticInfo());
+    assertEquals(TaskAttemptTerminationCause.COMMUNICATION_ERROR,
+        failedEvent.getTerminationCause());
+
+    assertEquals("Diagnostics2", killedEvent.getDiagnosticInfo());
+    assertEquals(TaskAttemptTerminationCause.SERVICE_BUSY, killedEvent.getTerminationCause());
+    // TODO TEZ-2003. Verify unregistration from the registered list
+  }
+
+  private ContainerId createContainerId(ApplicationId applicationId, int containerIdx) {
+    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(applicationId, 1);
+    ContainerId containerId = ContainerId.newInstance(appAttemptId, containerIdx);
+    return containerId;
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
index c1169ef..d45346a 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertEquals;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.eq;
+import static org.mockito.Matchers.isNull;
 import static org.mockito.Mockito.doReturn;
 import static org.mockito.Mockito.doAnswer;
 import static org.mockito.Mockito.mock;
@@ -57,6 +58,7 @@ import org.apache.tez.common.MockDNSToSwitchMapping;
 import org.apache.tez.dag.api.InputDescriptor;
 import org.apache.tez.dag.api.OutputDescriptor;
 import org.apache.tez.dag.api.ProcessorDescriptor;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
 import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.TaskLocationHint;
 import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
@@ -92,6 +94,7 @@ import org.apache.tez.runtime.api.impl.OutputSpec;
 import org.apache.tez.runtime.api.impl.TaskSpec;
 import org.junit.BeforeClass;
 import org.junit.Test;
+import org.mockito.internal.matchers.Null;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
@@ -223,9 +226,9 @@ public class TestContainerReuse {
 
     taskSchedulerEventHandler.handleEvent(
       new AMSchedulerEventTAEnded(
-        ta11, containerHost1.getId(), TaskAttemptState.SUCCEEDED, 0));
+        ta11, containerHost1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
     drainableAppCallback.drain();
-    verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
+    verify(taskScheduler).deallocateTask(eq(ta11), eq(true), eq((TaskAttemptEndReason)null));
     verify(taskSchedulerEventHandler, times(1)).taskAllocated(
       eq(ta31), any(Object.class), eq(containerHost1));
     verify(rmClient, times(0)).releaseAssignedContainer(
@@ -235,7 +238,7 @@ public class TestContainerReuse {
 
     taskSchedulerEventHandler.handleEvent(
       new AMSchedulerEventTAEnded(ta21, containerHost2.getId(),
-        TaskAttemptState.SUCCEEDED, 0));
+        TaskAttemptState.SUCCEEDED, null, 0));
 
     long currentTs = System.currentTimeMillis();
     Throwable exception = null;
@@ -356,9 +359,9 @@ public class TestContainerReuse {
 
     taskSchedulerEventHandler.handleEvent(
       new AMSchedulerEventTAEnded(ta21, containerHost2.getId(),
-        TaskAttemptState.SUCCEEDED, 0));
+        TaskAttemptState.SUCCEEDED, null, 0));
     drainableAppCallback.drain();
-    verify(taskScheduler).deallocateTask(eq(ta21), eq(true));
+    verify(taskScheduler).deallocateTask(eq(ta21), eq(true), eq((TaskAttemptEndReason)null));
     verify(taskSchedulerEventHandler, times(0)).taskAllocated(
       eq(ta31), any(Object.class), eq(containerHost2));
     verify(rmClient, times(1)).releaseAssignedContainer(
@@ -459,9 +462,9 @@ public class TestContainerReuse {
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta11), any(Object.class), eq(container1));
 
     // Task assigned to container completed successfully. Container should be re-used.
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
     drainableAppCallback.drain();
-    verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
+    verify(taskScheduler).deallocateTask(eq(ta11), eq(true), eq((TaskAttemptEndReason)null));
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta12), any(Object.class), eq(container1));
     verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
     eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
@@ -469,19 +472,19 @@ public class TestContainerReuse {
 
     // Task assigned to container completed successfully.
     // Verify reuse across hosts.
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta12, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta12, container1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
     drainableAppCallback.drain();
-    verify(taskScheduler).deallocateTask(eq(ta12), eq(true));
+    verify(taskScheduler).deallocateTask(eq(ta12), eq(true), eq((TaskAttemptEndReason)null));
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta13), any(Object.class), eq(container1));
     verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
     eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
     eventHandler.reset();
 
     // Verify no re-use if a previous task fails.
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta13, container1.getId(), TaskAttemptState.FAILED, 0));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta13, container1.getId(), TaskAttemptState.FAILED, null, 0));
     drainableAppCallback.drain();
     verify(taskSchedulerEventHandler, times(0)).taskAllocated(eq(ta14), any(Object.class), eq(container1));
-    verify(taskScheduler).deallocateTask(eq(ta13), eq(false));
+    verify(taskScheduler).deallocateTask(eq(ta13), eq(false), eq((TaskAttemptEndReason)null));
     verify(rmClient).releaseAssignedContainer(eq(container1.getId()));
     eventHandler.verifyInvocation(AMContainerEventStopRequest.class);
     eventHandler.reset();
@@ -496,9 +499,9 @@ public class TestContainerReuse {
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta14), any(Object.class), eq(container2));
 
     // Task assigned to container completed successfully. No pending requests. Container should be released.
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta14, container2.getId(), TaskAttemptState.SUCCEEDED, 0));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta14, container2.getId(), TaskAttemptState.SUCCEEDED, null, 0));
     drainableAppCallback.drain();
-    verify(taskScheduler).deallocateTask(eq(ta14), eq(true));
+    verify(taskScheduler).deallocateTask(eq(ta14), eq(true), eq((TaskAttemptEndReason)null));
     verify(rmClient).releaseAssignedContainer(eq(container2.getId()));
     eventHandler.verifyInvocation(AMContainerEventStopRequest.class);
     eventHandler.reset();
@@ -607,9 +610,9 @@ public class TestContainerReuse {
 
     // First task had profiling on. This container can not be reused further.
     taskSchedulerEventHandler.handleEvent(
-        new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
+        new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
     drainableAppCallback.drain();
-    verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
+    verify(taskScheduler).deallocateTask(eq(ta11), eq(true), eq((TaskAttemptEndReason)null));
     verify(taskSchedulerEventHandler, times(0)).taskAllocated(eq(ta12), any(Object.class),
         eq(container1));
     verify(rmClient, times(1)).releaseAssignedContainer(eq(container1.getId()));
@@ -653,9 +656,9 @@ public class TestContainerReuse {
 
     // Verify that the container can not be reused when profiling option is turned on
     // Even for 2 tasks having same profiling option can have container reusability.
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta13, container2.getId(), TaskAttemptState.SUCCEEDED, 0));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta13, container2.getId(), TaskAttemptState.SUCCEEDED, null, 0));
     drainableAppCallback.drain();
-    verify(taskScheduler).deallocateTask(eq(ta13), eq(true));
+    verify(taskScheduler).deallocateTask(eq(ta13), eq(true), eq((TaskAttemptEndReason)null));
     verify(taskSchedulerEventHandler, times(0)).taskAllocated(eq(ta14), any(Object.class),
         eq(container2));
     verify(rmClient, times(1)).releaseAssignedContainer(eq(container2.getId()));
@@ -698,9 +701,9 @@ public class TestContainerReuse {
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta15), any(Object.class), eq(container3));
 
     //Ensure task 6 (of vertex 1) is allocated to same container
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta15, container3.getId(), TaskAttemptState.SUCCEEDED, 0));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta15, container3.getId(), TaskAttemptState.SUCCEEDED, null, 0));
     drainableAppCallback.drain();
-    verify(taskScheduler).deallocateTask(eq(ta15), eq(true));
+    verify(taskScheduler).deallocateTask(eq(ta15), eq(true), eq((TaskAttemptEndReason)null));
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta16), any(Object.class), eq(container3));
     eventHandler.reset();
 
@@ -811,9 +814,9 @@ public class TestContainerReuse {
     // until delay expires.
     taskSchedulerEventHandler.handleEvent(
       new AMSchedulerEventTAEnded(ta11, container1.getId(),
-        TaskAttemptState.SUCCEEDED, 0));
+        TaskAttemptState.SUCCEEDED, null, 0));
     drainableAppCallback.drain();
-    verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
+    verify(taskScheduler).deallocateTask(eq(ta11), eq(true), eq((TaskAttemptEndReason)null));
     verify(taskSchedulerEventHandler, times(0)).taskAllocated(
       eq(ta12), any(Object.class), eq(container1));
     verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
@@ -828,7 +831,7 @@ public class TestContainerReuse {
     // TA12 completed.
     taskSchedulerEventHandler.handleEvent(
       new AMSchedulerEventTAEnded(ta12, container1.getId(),
-        TaskAttemptState.SUCCEEDED, 0));
+        TaskAttemptState.SUCCEEDED, null, 0));
     drainableAppCallback.drain();
     LOG.info("Sleeping to ensure that the scheduling loop runs");
     Thread.sleep(3000l);
@@ -946,9 +949,9 @@ public class TestContainerReuse {
     // Container should  be assigned to task21.
     taskSchedulerEventHandler.handleEvent(
       new AMSchedulerEventTAEnded(ta11, container1.getId(),
-        TaskAttemptState.SUCCEEDED, 0));
+        TaskAttemptState.SUCCEEDED, null, 0));
     drainableAppCallback.drain();
-    verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
+    verify(taskScheduler).deallocateTask(eq(ta11), eq(true), eq((TaskAttemptEndReason)null));
     verify(taskSchedulerEventHandler).taskAllocated(
       eq(ta21), any(Object.class), eq(container1));
     verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
@@ -956,7 +959,7 @@ public class TestContainerReuse {
     // Task 2 completes.
     taskSchedulerEventHandler.handleEvent(
       new AMSchedulerEventTAEnded(ta21, container1.getId(),
-        TaskAttemptState.SUCCEEDED, 0));
+        TaskAttemptState.SUCCEEDED, null, 0));
     verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
 
     LOG.info("Sleeping to ensure that the scheduling loop runs");
@@ -1065,9 +1068,9 @@ public class TestContainerReuse {
     assertEquals(1, assignEvent.getRemoteTaskLocalResources().size());
     
     // Task assigned to container completed successfully. Container should be re-used.
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta111, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta111, container1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
     drainableAppCallback.drain();
-    verify(taskScheduler).deallocateTask(eq(ta111), eq(true));
+    verify(taskScheduler).deallocateTask(eq(ta111), eq(true), eq((TaskAttemptEndReason)null));
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta112), any(Object.class), eq(container1));
     verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
     eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
@@ -1077,9 +1080,9 @@ public class TestContainerReuse {
 
     // Task assigned to container completed successfully.
     // Verify reuse across hosts.
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta112, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta112, container1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
     drainableAppCallback.drain();
-    verify(taskScheduler).deallocateTask(eq(ta112), eq(true));
+    verify(taskScheduler).deallocateTask(eq(ta112), eq(true), eq((TaskAttemptEndReason)null));
     verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
     eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
     eventHandler.reset();
@@ -1118,9 +1121,9 @@ public class TestContainerReuse {
     assertEquals(2, assignEvent.getRemoteTaskLocalResources().size());
     eventHandler.reset();
 
-    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta211, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
+    taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta211, container1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
     drainableAppCallback.drain();
-    verify(taskScheduler).deallocateTask(eq(ta211), eq(true));
+    verify(taskScheduler).deallocateTask(eq(ta211), eq(true), eq((TaskAttemptEndReason)null));
     verify(taskSchedulerEventHandler).taskAllocated(eq(ta212), any(Object.class), eq(container1));
     verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
     eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
index 25cf4b5..0a642bb 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.dag.app.dag.Task;
 import org.apache.tez.dag.app.rm.TaskSchedulerService.TaskSchedulerAppCallback;
@@ -94,7 +95,7 @@ public class TestLocalTaskSchedulerService {
 
     Task task = mock(Task.class);
     taskSchedulerService.allocateTask(task, Resource.newInstance(1024, 1), null, null, Priority.newInstance(1), null, null);
-    taskSchedulerService.deallocateTask(task, false);
+    taskSchedulerService.deallocateTask(task, false, null);
     // start the RequestHandler, DeallocateTaskRequest has higher priority, so will be processed first
     taskSchedulerService.startRequestHandlerThread();
 
@@ -126,7 +127,7 @@ public class TestLocalTaskSchedulerService {
 
     MockAsyncDelegateRequestHandler requestHandler = taskSchedulerService.getRequestHandler();
     requestHandler.drainRequest(1);
-    taskSchedulerService.deallocateTask(task, false);
+    taskSchedulerService.deallocateTask(task, false, null);
     requestHandler.drainRequest(2);
     assertEquals(1, requestHandler.deallocateCount);
     assertEquals(1, requestHandler.allocateCount);

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java
index dabae67..807e772 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java
@@ -172,7 +172,7 @@ public class TestTaskScheduler {
                            addContainerRequest((CookieContainerRequest) any());
 
     // returned from task requests before allocation happens
-    assertFalse(scheduler.deallocateTask(mockTask1, true));
+    assertFalse(scheduler.deallocateTask(mockTask1, true, null));
     verify(mockApp, times(0)).containerBeingReleased(any(ContainerId.class));
     verify(mockRMClient, times(1)).
                         removeContainerRequest((CookieContainerRequest) any());
@@ -180,7 +180,7 @@ public class TestTaskScheduler {
                                  releaseAssignedContainer((ContainerId) any());
 
     // deallocating unknown task
-    assertFalse(scheduler.deallocateTask(mockTask1, true));
+    assertFalse(scheduler.deallocateTask(mockTask1, true, null));
     verify(mockApp, times(0)).containerBeingReleased(any(ContainerId.class));
     verify(mockRMClient, times(1)).
                         removeContainerRequest((CookieContainerRequest) any());
@@ -325,7 +325,7 @@ public class TestTaskScheduler {
     verify(mockRMClient).releaseAssignedContainer(mockCId4);
 
     // deallocate allocated task
-    assertTrue(scheduler.deallocateTask(mockTask1, true));
+    assertTrue(scheduler.deallocateTask(mockTask1, true, null));
     drainableAppCallback.drain();
     verify(mockApp).containerBeingReleased(mockCId1);
     verify(mockRMClient).releaseAssignedContainer(mockCId1);
@@ -445,7 +445,7 @@ public class TestTaskScheduler {
     verify(mockApp, times(4)).taskAllocated(any(), any(), (Container) any());
     verify(mockApp).taskAllocated(mockTask4, mockCookie4, mockContainer6);
     // deallocate allocated task
-    assertTrue(scheduler.deallocateTask(mockTask4, true));
+    assertTrue(scheduler.deallocateTask(mockTask4, true, null));
     drainableAppCallback.drain();
     verify(mockApp).containerBeingReleased(mockCId6);
     verify(mockRMClient).releaseAssignedContainer(mockCId6);
@@ -475,7 +475,7 @@ public class TestTaskScheduler {
         removeContainerRequest((CookieContainerRequest) any());
     verify(mockRMClient, times(8)).addContainerRequest(
         (CookieContainerRequest) any());
-    assertFalse(scheduler.deallocateTask(mockTask1, true));
+    assertFalse(scheduler.deallocateTask(mockTask1, true, null));
 
     List<NodeReport> mockUpdatedNodes = mock(List.class);
     scheduler.onNodesUpdated(mockUpdatedNodes);
@@ -741,7 +741,7 @@ public class TestTaskScheduler {
     verify(mockRMClient).releaseAssignedContainer(mockCId4);
 
     // deallocate allocated task
-    assertTrue(scheduler.deallocateTask(mockTask1, true));
+    assertTrue(scheduler.deallocateTask(mockTask1, true, null));
     drainableAppCallback.drain();
     verify(mockApp).containerBeingReleased(mockCId1);
     verify(mockRMClient).releaseAssignedContainer(mockCId1);
@@ -871,7 +871,7 @@ public class TestTaskScheduler {
     verify(mockApp, times(4)).taskAllocated(any(), any(), (Container) any());
     verify(mockApp).taskAllocated(mockTask4, mockCookie4, mockContainer6);
     // deallocate allocated task
-    assertTrue(scheduler.deallocateTask(mockTask4, true));
+    assertTrue(scheduler.deallocateTask(mockTask4, true, null));
     drainableAppCallback.drain();
     verify(mockApp).containerBeingReleased(mockCId6);
     verify(mockRMClient).releaseAssignedContainer(mockCId6);
@@ -960,8 +960,8 @@ public class TestTaskScheduler {
     // container7 allocated to the task with affinity for it
     verify(mockApp).taskAllocated(mockTask6, mockCookie6, mockContainer7);
     // deallocate allocated task
-    assertTrue(scheduler.deallocateTask(mockTask5, true));
-    assertTrue(scheduler.deallocateTask(mockTask6, true));
+    assertTrue(scheduler.deallocateTask(mockTask5, true, null));
+    assertTrue(scheduler.deallocateTask(mockTask6, true, null));
     drainableAppCallback.drain();
     verify(mockApp).containerBeingReleased(mockCId7);
     verify(mockApp).containerBeingReleased(mockCId8);

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
index 5657f86..872d592 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.service.TezTestServiceConfConstants;
 
@@ -198,7 +199,7 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
   }
 
   @Override
-  public boolean deallocateTask(Object task, boolean taskSucceeded) {
+  public boolean deallocateTask(Object task, boolean taskSucceeded, TaskAttemptEndReason endReason) {
     ContainerId containerId = runningTasks.remove(task);
     if (containerId == null) {
       LOG.error("Could not determine ContainerId for task: " + task +

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
index a327caf..e3385a2 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
@@ -19,16 +19,20 @@ import java.nio.ByteBuffer;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.RejectedExecutionException;
 
 import com.google.protobuf.ByteString;
+import com.google.protobuf.ServiceException;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
 import org.apache.tez.dag.api.TaskCommunicatorContext;
 import org.apache.tez.dag.app.TezTaskCommunicatorImpl;
 import org.apache.tez.dag.app.TezTestServiceCommunicator;
@@ -83,6 +87,7 @@ public class TezTestServiceTaskCommunicatorImpl extends TezTaskCommunicatorImpl
   @Override
   public void serviceStop() {
     super.serviceStop();
+    this.communicator.stop();
   }
 
 
@@ -123,13 +128,15 @@ public class TezTestServiceTaskCommunicatorImpl extends TezTaskCommunicatorImpl
       throw new RuntimeException("ContainerInfo not found for container: " + containerId +
           ", while trying to launch task: " + taskSpec.getTaskAttemptID());
     }
+    // Have to register this up front right now. Otherwise, it's possible for the task to start
+    // sending out status/DONE/KILLED/FAILED messages before TAImpl knows how to handle them.
+    getTaskCommunicatorContext()
+        .taskStartedRemotely(taskSpec.getTaskAttemptID(), containerId);
     communicator.submitWork(requestProto, host, port,
         new TezTestServiceCommunicator.ExecuteRequestCallback<SubmitWorkResponseProto>() {
           @Override
           public void setResponse(SubmitWorkResponseProto response) {
             LOG.info("Successfully launched task: " + taskSpec.getTaskAttemptID());
-            getTaskCommunicatorContext()
-                .taskStartedRemotely(taskSpec.getTaskAttemptID(), containerId);
           }
 
           @Override
@@ -137,6 +144,31 @@ public class TezTestServiceTaskCommunicatorImpl extends TezTaskCommunicatorImpl
             // TODO Handle this error. This is where an API on the context to indicate failure / rejection comes in.
             LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " +
                 containerId, t);
+            if (t instanceof ServiceException) {
+              ServiceException se = (ServiceException) t;
+              t = se.getCause();
+            }
+            if (t instanceof RemoteException) {
+              RemoteException re = (RemoteException)t;
+              String message = re.toString();
+              if (message.contains(RejectedExecutionException.class.getName())) {
+                getTaskCommunicatorContext().taskKilled(taskSpec.getTaskAttemptID(),
+                    TaskAttemptEndReason.SERVICE_BUSY, "Service Busy");
+              } else {
+                getTaskCommunicatorContext()
+                    .taskFailed(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.OTHER,
+                        t.toString());
+              }
+            } else {
+              if (t instanceof IOException) {
+                getTaskCommunicatorContext().taskKilled(taskSpec.getTaskAttemptID(),
+                    TaskAttemptEndReason.COMMUNICATION_ERROR, "Communication Error");
+              } else {
+                getTaskCommunicatorContext()
+                    .taskFailed(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.OTHER,
+                        t.getMessage());
+              }
+            }
           }
         });
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java
index 2bca4ed..28c2286 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java
@@ -16,12 +16,13 @@ package org.apache.tez.service;
 
 import java.io.IOException;
 
+import org.apache.tez.dag.api.TezException;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
 
 public interface ContainerRunner {
 
-  void queueContainer(RunContainerRequestProto request) throws IOException;
-  void submitWork(SubmitWorkRequestProto request) throws IOException;
+  void queueContainer(RunContainerRequestProto request) throws TezException;
+  void submitWork(SubmitWorkRequestProto request) throws TezException;
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
index f47bd67..0ac0b33 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
@@ -133,7 +133,10 @@ public class MiniTezTestServiceCluster extends AbstractService {
 
   @Override
   public void serviceStop() {
-    tezTestService.stop();
+    if (tezTestService != null) {
+      tezTestService.stop();
+      tezTestService = null;
+    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
index 25d6030..379d952 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
@@ -25,6 +25,7 @@ import java.util.Map;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
+import java.util.concurrent.RejectedExecutionException;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.atomic.AtomicReference;
 
@@ -58,6 +59,7 @@ import org.apache.tez.common.security.JobTokenIdentifier;
 import org.apache.tez.common.security.TokenCache;
 import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.TezException;
+import org.apache.tez.runtime.api.impl.TaskSpec;
 import org.apache.tez.runtime.task.TaskReporter;
 import org.apache.tez.runtime.task.TezTaskRunner;
 import org.apache.tez.service.ContainerRunner;
@@ -68,14 +70,18 @@ import org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl;
 import org.apache.tez.runtime.task.TezChild;
 import org.apache.tez.runtime.task.TezChild.ContainerExecutionResult;
 import org.apache.tez.shufflehandler.ShuffleHandler;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.TaskSpecProto;
 import org.apache.tez.util.ProtoConverters;
 
 public class ContainerRunnerImpl extends AbstractService implements ContainerRunner {
 
   private static final Logger LOG = Logger.getLogger(ContainerRunnerImpl.class);
 
+  public static final String DAG_NAME_INSTRUMENTED_FAILURES = "InstrumentedFailures";
+
   private final ListeningExecutorService executorService;
   private final AtomicReference<InetSocketAddress> localAddress;
   private final String[] localDirsBase;
@@ -146,10 +152,10 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
    * Submit a container which is ready for running.
    * The regular pull mechanism will be used to fetch work from the AM
    * @param request
-   * @throws IOException
+   * @throws TezException
    */
   @Override
-  public void queueContainer(RunContainerRequestProto request) throws IOException {
+  public void queueContainer(RunContainerRequestProto request) throws TezException {
     LOG.info("Queuing container for execution: " + request);
 
     Map<String, String> env = new HashMap<String, String>();
@@ -162,7 +168,11 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
     for (int i = 0; i < localDirsBase.length; i++) {
       localDirs[i] = createAppSpecificLocalDir(localDirsBase[i], request.getApplicationIdString(),
           request.getUser());
-      localFs.mkdirs(new Path(localDirs[i]));
+      try {
+        localFs.mkdirs(new Path(localDirs[i]));
+      } catch (IOException e) {
+        throw new TezException(e);
+      }
     }
     LOG.info("DEBUG: Dirs are: " + Arrays.toString(localDirs));
 
@@ -175,7 +185,11 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
     DataInputBuffer dib = new DataInputBuffer();
     byte[] tokenBytes = request.getCredentialsBinary().toByteArray();
     dib.reset(tokenBytes, tokenBytes.length);
-    credentials.readTokenStorageStream(dib);
+    try {
+      credentials.readTokenStorageStream(dib);
+    } catch (IOException e) {
+      throw new TezException(e);
+    }
 
     Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
 
@@ -197,13 +211,14 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
    * This is intended for a task push from the AM
    *
    * @param request
-   * @throws IOException
+   * @throws org.apache.tez.dag.api.TezException
    */
   @Override
-  public void submitWork(SubmitWorkRequestProto request) throws
-      IOException {
+  public void submitWork(SubmitWorkRequestProto request) throws TezException {
     LOG.info("Queuing work for execution: " + request);
 
+    checkAndThrowExceptionForTests(request);
+
     Map<String, String> env = new HashMap<String, String>();
     env.putAll(localEnv);
     env.put(ApplicationConstants.Environment.USER.name(), request.getUser());
@@ -214,7 +229,11 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
     for (int i = 0; i < localDirsBase.length; i++) {
       localDirs[i] = createAppSpecificLocalDir(localDirsBase[i], request.getApplicationIdString(),
           request.getUser());
-      localFs.mkdirs(new Path(localDirs[i]));
+      try {
+        localFs.mkdirs(new Path(localDirs[i]));
+      } catch (IOException e) {
+        throw new TezException(e);
+      }
     }
     if (LOG.isDebugEnabled()) {
       LOG.debug("Dirs are: " + Arrays.toString(localDirs));
@@ -228,7 +247,11 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
     DataInputBuffer dib = new DataInputBuffer();
     byte[] tokenBytes = request.getCredentialsBinary().toByteArray();
     dib.reset(tokenBytes, tokenBytes.length);
-    credentials.readTokenStorageStream(dib);
+    try {
+      credentials.readTokenStorageStream(dib);
+    } catch (IOException e) {
+      throw new TezException(e);
+    }
 
     Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
 
@@ -509,4 +532,23 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
     }
   }
 
+
+  private void checkAndThrowExceptionForTests(SubmitWorkRequestProto request) throws TezException {
+    if (!request.getTaskSpec().getDagName().equals(DAG_NAME_INSTRUMENTED_FAILURES)) {
+      return;
+    }
+
+    TaskSpec taskSpec = ProtoConverters.getTaskSpecfromProto(request.getTaskSpec());
+    if (taskSpec.getTaskAttemptID().getTaskID().getId() == 0 &&
+        taskSpec.getTaskAttemptID().getId() == 0) {
+      LOG.info("Simulating Rejected work");
+      throw new RejectedExecutionException(
+          "Simulating Rejected work for taskAttemptId=" + taskSpec.getTaskAttemptID());
+    } else if (taskSpec.getTaskAttemptID().getTaskID().getId() == 1 &&
+        taskSpec.getTaskAttemptID().getId() == 0) {
+      LOG.info("Simulating Task Setup Failure during launch");
+      throw new TezException("Simulating Task Setup Failure during launch for taskAttemptId=" +
+          taskSpec.getTaskAttemptID());
+    }
+  }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
index 012e352..855f1b0 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
@@ -14,7 +14,6 @@
 
 package org.apache.tez.service.impl;
 
-import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.util.Arrays;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -25,6 +24,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.log4j.Logger;
+import org.apache.tez.dag.api.TezException;
 import org.apache.tez.service.ContainerRunner;
 import org.apache.tez.shufflehandler.ShuffleHandler;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
@@ -108,14 +108,14 @@ public class TezTestService extends AbstractService implements ContainerRunner {
 
 
   @Override
-  public void queueContainer(RunContainerRequestProto request) throws IOException {
+  public void queueContainer(RunContainerRequestProto request) throws TezException {
     numSubmissions.incrementAndGet();
     containerRunner.queueContainer(request);
   }
 
   @Override
   public void submitWork(TezTestServiceProtocolProtos.SubmitWorkRequestProto request) throws
-      IOException {
+      TezException {
     numSubmissions.incrementAndGet();
     containerRunner.submitWork(request);
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
index d7f8444..39d7156 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
@@ -30,11 +30,13 @@ import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.service.AbstractService;
+import org.apache.tez.dag.api.TezException;
 import org.apache.tez.service.ContainerRunner;
 import org.apache.tez.service.TezTestServiceProtocolBlockingPB;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerResponseProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
 import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkResponseProto;
 
 public class TezTestServiceProtocolServerImpl extends AbstractService
@@ -61,20 +63,20 @@ public class TezTestServiceProtocolServerImpl extends AbstractService
     LOG.info("Received request: " + request);
     try {
       containerRunner.queueContainer(request);
-    } catch (IOException e) {
+    } catch (TezException e) {
       throw new ServiceException(e);
     }
     return RunContainerResponseProto.getDefaultInstance();
   }
 
   @Override
-  public SubmitWorkResponseProto submitWork(RpcController controller, TezTestServiceProtocolProtos.SubmitWorkRequestProto request) throws
+  public SubmitWorkResponseProto submitWork(RpcController controller, SubmitWorkRequestProto request) throws
       ServiceException {
     LOG.info("Received submitWork request: " + request);
     try {
       containerRunner.submitWork(request);
-    } catch (IOException e) {
-      e.printStackTrace();
+    } catch (TezException e) {
+      throw new ServiceException(e);
     }
     return SubmitWorkResponseProto.getDefaultInstance();
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
index 0ec972b..b6a166d 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
@@ -27,16 +27,23 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.tez.client.TezClient;
+import org.apache.tez.dag.api.DAG;
+import org.apache.tez.dag.api.ProcessorDescriptor;
 import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.TezConstants;
 import org.apache.tez.dag.api.TezException;
+import org.apache.tez.dag.api.Vertex;
+import org.apache.tez.dag.api.client.DAGClient;
+import org.apache.tez.dag.api.client.DAGStatus;
 import org.apache.tez.dag.app.launcher.TezTestServiceNoOpContainerLauncher;
 import org.apache.tez.dag.app.rm.TezTestServiceTaskSchedulerService;
 import org.apache.tez.dag.app.taskcomm.TezTestServiceTaskCommunicatorImpl;
 import org.apache.tez.examples.HashJoinExample;
 import org.apache.tez.examples.JoinDataGen;
 import org.apache.tez.examples.JoinValidateConfigured;
+import org.apache.tez.runtime.library.processor.SleepProcessor;
 import org.apache.tez.service.MiniTezTestServiceCluster;
+import org.apache.tez.service.impl.ContainerRunnerImpl;
 import org.apache.tez.test.MiniTezCluster;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -283,6 +290,28 @@ public class TestExternalTezServices {
         PROPS_IN_AM, PROPS_REGULAR_CONTAINERS);
   }
 
+  @Test(timeout = 60000)
+  public void testErrorPropagation() throws TezException, InterruptedException, IOException {
+    runExceptionSimulation();
+  }
+
+
+
+  private void runExceptionSimulation() throws IOException, TezException, InterruptedException {
+    DAG dag = DAG.create(ContainerRunnerImpl.DAG_NAME_INSTRUMENTED_FAILURES);
+    Vertex v =Vertex.create("Vertex1", ProcessorDescriptor.create(SleepProcessor.class.getName()),
+        3);
+    for (Map.Entry<String, String> prop : PROPS_EXT_SERVICE_PUSH.entrySet()) {
+      v.setConf(prop.getKey(), prop.getValue());
+    }
+    dag.addVertex(v);
+    DAGClient dagClient = sharedTezClient.submitDAG(dag);
+    DAGStatus dagStatus = dagClient.waitForCompletion();
+    assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
+    assertEquals(1, dagStatus.getDAGProgress().getFailedTaskAttemptCount());
+    assertEquals(1, dagStatus.getDAGProgress().getKilledTaskAttemptCount());
+
+  }
 
   private void runJoinValidate(String name, int extExpectedCount, Map<String, String> lhsProps,
                                Map<String, String> rhsProps,


[35/43] tez git commit: TEZ-2284. Separate TaskReporter into an interface. (sseth)

Posted by ss...@apache.org.
TEZ-2284. Separate TaskReporter into an interface. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/2b05376a
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/2b05376a
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/2b05376a

Branch: refs/heads/TEZ-2003
Commit: 2b05376ad2b63b81d4ab09c6052509066341b47a
Parents: 2616479
Author: Siddharth Seth <ss...@apache.org>
Authored: Tue Apr 7 13:21:35 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:31 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                            |  1 +
 .../internals/api/TaskReporterInterface.java    | 46 ++++++++++++++++++++
 .../apache/tez/runtime/task/TaskReporter.java   | 12 ++++-
 .../org/apache/tez/runtime/task/TezChild.java   |  3 +-
 .../apache/tez/runtime/task/TezTaskRunner.java  |  5 ++-
 5 files changed, 62 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/2b05376a/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 6a4399c..e2c428d 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -13,5 +13,6 @@ ALL CHANGES:
   TEZ-2187. Allow TaskCommunicators to report failed / killed attempts.
   TEZ-2241. Miscellaneous fixes after last reabse.
   TEZ-2283. Fixes after rebase 04/07.
+  TEZ-2284. Separate TaskReporter into an interface.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/2b05376a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TaskReporterInterface.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TaskReporterInterface.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TaskReporterInterface.java
new file mode 100644
index 0000000..47a61ab
--- /dev/null
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TaskReporterInterface.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.runtime.internals.api;
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.tez.dag.api.TezException;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.runtime.RuntimeTask;
+import org.apache.tez.runtime.api.impl.EventMetaData;
+import org.apache.tez.runtime.api.impl.TezEvent;
+import org.apache.tez.runtime.task.ErrorReporter;
+
+public interface TaskReporterInterface {
+
+  // TODO TEZ-2003 Consolidate private API usage if making this public
+
+  void registerTask(RuntimeTask task, ErrorReporter errorReporter);
+
+  void unregisterTask(TezTaskAttemptID taskAttemptId);
+
+  boolean taskSucceeded(TezTaskAttemptID taskAttemptId) throws IOException, TezException;
+
+  boolean taskFailed(TezTaskAttemptID taskAttemptId, Throwable cause, String diagnostics, EventMetaData srcMeta) throws IOException,
+      TezException;
+
+  void addEvents(TezTaskAttemptID taskAttemptId, Collection<TezEvent> events);
+
+  boolean canCommit(TezTaskAttemptID taskAttemptId) throws IOException;
+
+  void shutdown();
+
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/2b05376a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
index 8b9db16..b95e514 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
@@ -48,6 +48,7 @@ import org.apache.tez.runtime.api.impl.TezEvent;
 import org.apache.tez.runtime.api.impl.TezHeartbeatRequest;
 import org.apache.tez.runtime.api.impl.TezHeartbeatResponse;
 import org.apache.tez.runtime.api.impl.EventMetaData.EventProducerConsumerType;
+import org.apache.tez.runtime.internals.api.TaskReporterInterface;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -66,7 +67,7 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder;
  * retrieve events specific to this task.
  * 
  */
-public class TaskReporter {
+public class TaskReporter implements TaskReporterInterface {
 
   private static final Logger LOG = LoggerFactory.getLogger(TaskReporter.class);
 
@@ -98,6 +99,7 @@ public class TaskReporter {
   /**
    * Register a task to be tracked. Heartbeats will be sent out for this task to fetch events, etc.
    */
+  @Override
   public synchronized void registerTask(RuntimeTask task,
       ErrorReporter errorReporter) {
     currentCallable = new HeartbeatCallable(task, umbilical, pollInterval, sendCounterInterval,
@@ -110,12 +112,14 @@ public class TaskReporter {
    * This method should always be invoked before setting up heartbeats for another task running in
    * the same container.
    */
+  @Override
   public synchronized void unregisterTask(TezTaskAttemptID taskAttemptID) {
     currentCallable.markComplete();
     currentCallable = null;
     // KKK Make sure the callable completes before proceeding
   }
-  
+
+  @Override
   public void shutdown() {
     heartbeatExecutor.shutdownNow();
   }
@@ -411,19 +415,23 @@ public class TaskReporter {
     }
   }
 
+  @Override
   public synchronized boolean taskSucceeded(TezTaskAttemptID taskAttemptID) throws IOException, TezException {
     return currentCallable.taskSucceeded(taskAttemptID);
   }
 
+  @Override
   public synchronized boolean taskFailed(TezTaskAttemptID taskAttemptID, Throwable t, String diagnostics,
       EventMetaData srcMeta) throws IOException, TezException {
     return currentCallable.taskFailed(taskAttemptID, t, diagnostics, srcMeta);
   }
 
+  @Override
   public synchronized void addEvents(TezTaskAttemptID taskAttemptID, Collection<TezEvent> events) {
     currentCallable.addEvents(taskAttemptID, events);
   }
 
+  @Override
   public boolean canCommit(TezTaskAttemptID taskAttemptID) throws IOException {
     return umbilical.canCommit(taskAttemptID);
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/2b05376a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
index 7615f08..c4fd64c 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
@@ -66,6 +66,7 @@ import org.apache.tez.runtime.api.impl.ExecutionContextImpl;
 import org.apache.tez.runtime.api.impl.TaskSpec;
 import org.apache.tez.runtime.api.impl.TezUmbilical;
 import org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl;
+import org.apache.tez.runtime.internals.api.TaskReporterInterface;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -111,7 +112,7 @@ public class TezChild {
   private final boolean ownUmbilical;
 
   private final TezTaskUmbilicalProtocol umbilical;
-  private TaskReporter taskReporter;
+  private TaskReporterInterface taskReporter;
   private int taskCount = 0;
   private TezVertexID lastVertexID;
 

http://git-wip-us.apache.org/repos/asf/tez/blob/2b05376a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
index f54814b..33a7f4a 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
@@ -41,6 +41,7 @@ import org.apache.tez.runtime.api.impl.EventMetaData;
 import org.apache.tez.runtime.api.impl.TaskSpec;
 import org.apache.tez.runtime.api.impl.TezEvent;
 import org.apache.tez.runtime.api.impl.TezUmbilical;
+import org.apache.tez.runtime.internals.api.TaskReporterInterface;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -56,7 +57,7 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
   private final LogicalIOProcessorRuntimeTask task;
   private final UserGroupInformation ugi;
 
-  private final TaskReporter taskReporter;
+  private final TaskReporterInterface taskReporter;
   private final ListeningExecutorService executor;
   private volatile ListenableFuture<Void> taskFuture;
   private volatile Thread waitingThread;
@@ -70,7 +71,7 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
   public TezTaskRunner(Configuration tezConf, UserGroupInformation ugi, String[] localDirs,
       TaskSpec taskSpec, int appAttemptNumber,
       Map<String, ByteBuffer> serviceConsumerMetadata, Map<String, String> serviceProviderEnvMap,
-      Multimap<String, String> startedInputsMap, TaskReporter taskReporter,
+      Multimap<String, String> startedInputsMap, TaskReporterInterface taskReporter,
       ListeningExecutorService executor, ObjectRegistry objectRegistry, String pid,
       ExecutionContext executionContext, long memAvailable)
           throws IOException {


[09/43] tez git commit: TEZ-2404. Handle DataMovementEvent before its TaskAttemptCompletedEvent (zjffdu)

Posted by ss...@apache.org.
TEZ-2404. Handle DataMovementEvent before its TaskAttemptCompletedEvent (zjffdu)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/02870f0a
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/02870f0a
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/02870f0a

Branch: refs/heads/TEZ-2003
Commit: 02870f0ac1095d67a85a864860b0c4ce68a1db57
Parents: d5a0f39
Author: Jeff Zhang <zj...@apache.org>
Authored: Thu May 7 13:03:07 2015 +0800
Committer: Jeff Zhang <zj...@apache.org>
Committed: Thu May 7 13:03:07 2015 +0800

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../dag/app/TaskAttemptListenerImpTezDag.java   | 35 +++++---------------
 .../apache/tez/dag/app/dag/impl/VertexImpl.java |  8 +++++
 .../app/TestTaskAttemptListenerImplTezDag.java  | 21 +++++++-----
 4 files changed, 31 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/02870f0a/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 58648e4..7feefcc 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,7 @@ INCOMPATIBLE CHANGES
     Default max limit increased. Should not affect existing users.
 
 ALL CHANGES:
+  TEZ-2404. Handle DataMovementEvent before its TaskAttemptCompletedEvent
   TEZ-2424. Bump up max counter group name length limit to account for per_io counters.
   TEZ-2417. Tez UI: Counters are blank in the Attempts page if all attempts failed
   TEZ-2366. Pig tez MiniTezCluster unit tests fail intermittently after TEZ-2333

http://git-wip-us.apache.org/repos/asf/tez/blob/02870f0a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index d96da83..b38081b 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -423,12 +423,17 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
         }
 
         List<TezEvent> otherEvents = new ArrayList<TezEvent>();
+        // route TASK_STATUS_UPDATE_EVENT directly to TaskAttempt and route other events
+        // (DATA_MOVEMENT_EVENT, TASK_ATTEMPT_COMPLETED_EVENT, TASK_ATTEMPT_FAILED_EVENT)
+        // to VertexImpl to ensure the events ordering
+        //  1. DataMovementEvent is logged as RecoveryEvent before TaskAttemptFinishedEvent
+        //  2. TaskStatusEvent is handled before TaskAttemptFinishedEvent
         for (TezEvent tezEvent : ListUtils.emptyIfNull(inEvents)) {
           final EventType eventType = tezEvent.getEventType();
-          if (eventType == EventType.TASK_STATUS_UPDATE_EVENT ||
-              eventType == EventType.TASK_ATTEMPT_COMPLETED_EVENT) {
-            context.getEventHandler()
-                .handle(getTaskAttemptEventFromTezEvent(taskAttemptID, tezEvent));
+          if (eventType == EventType.TASK_STATUS_UPDATE_EVENT) {
+            TaskAttemptEvent taskAttemptEvent = new TaskAttemptEventStatusUpdate(taskAttemptID,
+                    (TaskStatusUpdateEvent) tezEvent.getEvent());
+            context.getEventHandler().handle(taskAttemptEvent);
           } else {
             otherEvents.add(tezEvent);
           }
@@ -453,28 +458,6 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
     }
   }
 
-  private TaskAttemptEvent getTaskAttemptEventFromTezEvent(TezTaskAttemptID taskAttemptID,
-                                                           TezEvent tezEvent) {
-    final EventType eventType = tezEvent.getEventType();
-    TaskAttemptEvent taskAttemptEvent;
-    switch (eventType) {
-      case TASK_STATUS_UPDATE_EVENT:
-        {
-          taskAttemptEvent = new TaskAttemptEventStatusUpdate(taskAttemptID,
-              (TaskStatusUpdateEvent) tezEvent.getEvent());
-        }
-        break;
-      case TASK_ATTEMPT_COMPLETED_EVENT:
-        {
-          taskAttemptEvent = new TaskAttemptEvent(taskAttemptID, TaskAttemptEventType.TA_DONE);
-        }
-        break;
-      default:
-        throw new TezUncheckedException("unknown event type " + eventType);
-    }
-    return taskAttemptEvent;
-  }
-
   private Map<String, TezLocalResource> convertLocalResourceMap(Map<String, LocalResource> ylrs)
       throws IOException {
     Map<String, TezLocalResource> tlrs = Maps.newHashMap();

http://git-wip-us.apache.org/repos/asf/tez/blob/02870f0a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
index 9ed7441..5d61642 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
@@ -132,6 +132,7 @@ import org.apache.tez.dag.app.dag.event.VertexEventTaskCompleted;
 import org.apache.tez.dag.app.dag.event.VertexEventTaskReschedule;
 import org.apache.tez.dag.app.dag.event.VertexEventTermination;
 import org.apache.tez.dag.app.dag.event.VertexEventType;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEvent;
 import org.apache.tez.dag.app.dag.impl.DAGImpl.VertexGroupInfo;
 import org.apache.tez.dag.app.dag.speculation.legacy.LegacySpeculator;
 import org.apache.tez.dag.history.DAGHistoryEvent;
@@ -4131,6 +4132,13 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
               );
         }
         break;
+      case TASK_ATTEMPT_COMPLETED_EVENT:
+        {
+          checkEventSourceMetadata(vertex, sourceMeta);
+          vertex.getEventHandler().handle(
+              new TaskAttemptEvent(sourceMeta.getTaskAttemptID(), TaskAttemptEventType.TA_DONE));
+        }
+        break;
       default:
         throw new TezUncheckedException("Unhandled tez event type: "
             + tezEvent.getEventType());

http://git-wip-us.apache.org/repos/asf/tez/blob/02870f0a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
index ec4f99a..f974f40 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
@@ -24,6 +24,7 @@ import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -53,6 +54,7 @@ import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
 import org.apache.tez.dag.records.TezVertexID;
+import org.apache.tez.runtime.api.events.DataMovementEvent;
 import org.apache.tez.runtime.api.events.InputInitializerEvent;
 import org.apache.tez.runtime.api.events.TaskAttemptCompletedEvent;
 import org.apache.tez.runtime.api.events.TaskStatusUpdateEvent;
@@ -179,8 +181,9 @@ public class TestTaskAttemptListenerImplTezDag {
   @Test (timeout = 5000)
   public void testTaskEventRouting() throws Exception {
     List<TezEvent> events =  Arrays.asList(
-      new TezEvent(InputInitializerEvent.create("test_vertex", "test_input", null), null),
-      new TezEvent(new TaskStatusUpdateEvent(null, 0.0f, null), null)
+      new TezEvent(new TaskStatusUpdateEvent(null, 0.0f, null), null),
+      new TezEvent(DataMovementEvent.create(0, ByteBuffer.wrap(new byte[0])), null),
+      new TezEvent(new TaskAttemptCompletedEvent(), null)
     );
 
     EventHandler eventHandler = generateHeartbeat(events);
@@ -193,13 +196,15 @@ public class TestTaskAttemptListenerImplTezDag {
     assertEquals("First event should be status update", TaskAttemptEventType.TA_STATUS_UPDATE,
         statusUpdateEvent.getType());
 
-
     final Event vertexEvent = argAllValues.get(1);
     final VertexEventRouteEvent vertexRouteEvent = (VertexEventRouteEvent)vertexEvent;
-    assertEquals("Other events should be routed to vertex", VertexEventType.V_ROUTE_EVENT,
+    assertEquals("First event should be routed to vertex", VertexEventType.V_ROUTE_EVENT,
         vertexEvent.getType());
-    assertEquals(EventType.ROOT_INPUT_INITIALIZER_EVENT,
+    assertEquals(EventType.DATA_MOVEMENT_EVENT,
         vertexRouteEvent.getEvents().get(0).getEventType());
+    assertEquals(EventType.TASK_ATTEMPT_COMPLETED_EVENT,
+        vertexRouteEvent.getEvents().get(1).getEventType());
+
   }
 
   @Test (timeout = 5000)
@@ -213,9 +218,9 @@ public class TestTaskAttemptListenerImplTezDag {
     verify(eventHandler, times(1)).handle(arg.capture());
     final List<Event> argAllValues = arg.getAllValues();
 
-    final Event statusUpdateEvent = argAllValues.get(0);
-    assertEquals("only event should be task done", TaskAttemptEventType.TA_DONE,
-        statusUpdateEvent.getType());
+    final Event event = argAllValues.get(0);
+    assertEquals("only event should be route event", VertexEventType.V_ROUTE_EVENT,
+        event.getType());
   }
 
   private EventHandler generateHeartbeat(List<TezEvent> events) throws IOException, TezException {


[05/43] tez git commit: Move TEZ-1752 into 0.8.0 release section.

Posted by ss...@apache.org.
Move TEZ-1752 into 0.8.0 release section.


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/7476fae8
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/7476fae8
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/7476fae8

Branch: refs/heads/TEZ-2003
Commit: 7476fae834bafb7a36d17382a08a5385fee0e343
Parents: 16bbc58
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed May 6 14:38:10 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Wed May 6 14:38:10 2015 -0700

----------------------------------------------------------------------
 CHANGES.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/7476fae8/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index d3aa8a9..fd45454 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -7,6 +7,7 @@ Release 0.8.0: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-1752. Inputs / Outputs in the Runtime library should be interruptable.
 
 Release 0.7.0: Unreleased
 
@@ -20,7 +21,6 @@ ALL CHANGES:
   TEZ-2406. Tez UI: Display per-io counter columns in task and attempt pages under vertex
   TEZ-2384. Add warning message in the case of prewarn under non-session mode.
   TEZ-2415. PMC RDF needs to use asfext:pmc, not asfext:PMC.
-  TEZ-1752. Inputs / Outputs in the Runtime library should be interruptable.
   TEZ-2401. Tez UI: All-dag page has duration keep counting for KILLED dag.
   TEZ-2392. Have all readers throw an Exception on incorrect next() usage.
   TEZ-2408. TestTaskAttempt fails to compile against hadoop-2.4 and hadoop-2.2.


[10/43] tez git commit: TEZ-2416. Tez UI: Make tooltips display faster. (Sreenath Somarajapuram via pramachandran)

Posted by ss...@apache.org.
TEZ-2416. Tez UI: Make tooltips display faster. (Sreenath Somarajapuram via pramachandran)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/70cd396d
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/70cd396d
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/70cd396d

Branch: refs/heads/TEZ-2003
Commit: 70cd396d27188aa5e79280552199e7c81f7d2822
Parents: 02870f0
Author: Prakash Ramachandran <pr...@hortonworks.com>
Authored: Thu May 7 17:45:04 2015 +0530
Committer: Prakash Ramachandran <pr...@hortonworks.com>
Committed: Thu May 7 17:45:04 2015 +0530

----------------------------------------------------------------------
 CHANGES.txt                                 | 1 +
 tez-ui/src/main/webapp/app/index.html       | 1 -
 tez-ui/src/main/webapp/app/scripts/app.js   | 5 +++++
 tez-ui/src/main/webapp/app/styles/main.less | 9 ++++++++-
 4 files changed, 14 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/70cd396d/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 7feefcc..c3d48b6 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,7 @@ INCOMPATIBLE CHANGES
     Default max limit increased. Should not affect existing users.
 
 ALL CHANGES:
+  TEZ-2416. Tez UI: Make tooltips display faster.
   TEZ-2404. Handle DataMovementEvent before its TaskAttemptCompletedEvent
   TEZ-2424. Bump up max counter group name length limit to account for per_io counters.
   TEZ-2417. Tez UI: Counters are blank in the Attempts page if all attempts failed

http://git-wip-us.apache.org/repos/asf/tez/blob/70cd396d/tez-ui/src/main/webapp/app/index.html
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/index.html b/tez-ui/src/main/webapp/app/index.html
index f87b2a6..35cfa20 100644
--- a/tez-ui/src/main/webapp/app/index.html
+++ b/tez-ui/src/main/webapp/app/index.html
@@ -39,7 +39,6 @@
     <script src="bower_components/jquery-ui/jquery-ui.js"></script>
     <script src="bower_components/bootstrap/js/dropdown.js"></script>
     <script src="bower_components/bootstrap/js/button.js"></script>
-    <script src="bower_components/bootstrap/js/tooltip.js"></script>
     <script src="bower_components/jquery-ui/ui/datepicker.js"></script>
     <script src="bower_components/moment/moment.js"></script>
     <script src="bower_components/handlebars/handlebars.js"></script>

http://git-wip-us.apache.org/repos/asf/tez/blob/70cd396d/tez-ui/src/main/webapp/app/scripts/app.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/app.js b/tez-ui/src/main/webapp/app/scripts/app.js
index 37aa563..dd493d0 100644
--- a/tez-ui/src/main/webapp/app/scripts/app.js
+++ b/tez-ui/src/main/webapp/app/scripts/app.js
@@ -54,6 +54,11 @@ App.Configs = Em.Namespace.create();
 App.ready = function () {
   $.extend(App.env, App.Configs.envDefaults);
 
+  $(document).tooltip({
+    delay: 20,
+    tooltipClass: 'generic-tooltip'
+  });
+
   ["timelineBaseUrl", "RMWebUrl"].forEach(function(item) {
     if (!!App.env[item]) {
       App.env[item] = App.Helpers.misc.normalizePath(App.env[item]);

http://git-wip-us.apache.org/repos/asf/tez/blob/70cd396d/tez-ui/src/main/webapp/app/styles/main.less
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/styles/main.less b/tez-ui/src/main/webapp/app/styles/main.less
index ad3f132..e89d4c3 100644
--- a/tez-ui/src/main/webapp/app/styles/main.less
+++ b/tez-ui/src/main/webapp/app/styles/main.less
@@ -897,4 +897,11 @@ body, html {
 
 .per-io {
   color: @text-green;
-}
\ No newline at end of file
+}
+
+.generic-tooltip {
+  padding: 3px 5px !important;
+  background: rgba(0,0,0,.8) !important;
+  color: white !important;
+  border: none !important;
+}


[02/43] tez git commit: TEZ-2366. Pig tez MiniTezCluster unit tests fail intermittently after TEZ-2333 (pramachandran)

Posted by ss...@apache.org.
TEZ-2366. Pig tez MiniTezCluster unit tests fail intermittently after TEZ-2333 (pramachandran)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/6a04fa48
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/6a04fa48
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/6a04fa48

Branch: refs/heads/TEZ-2003
Commit: 6a04fa48cb1113faf640115dcbba9b2270e756f3
Parents: 12ef073
Author: Prakash Ramachandran <pr...@hortonworks.com>
Authored: Wed May 6 19:11:06 2015 +0530
Committer: Prakash Ramachandran <pr...@hortonworks.com>
Committed: Wed May 6 19:11:06 2015 +0530

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../runtime/library/common/shuffle/Fetcher.java | 17 ++---
 .../common/shuffle/impl/ShuffleManager.java     | 12 +++-
 .../orderedgrouped/FetcherOrderedGrouped.java   | 67 ++++++++++--------
 .../common/shuffle/orderedgrouped/Shuffle.java  | 11 ++-
 .../library/common/shuffle/TestFetcher.java     | 49 +++++++++++--
 .../impl/TestShuffleInputEventHandlerImpl.java  | 14 +++-
 .../shuffle/orderedgrouped/TestFetcher.java     | 74 +++++++++++++++++++-
 8 files changed, 191 insertions(+), 54 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/6a04fa48/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index f060a8c..91dd9c4 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -15,6 +15,7 @@ INCOMPATIBLE CHANGES
   TEZ-1993. Implement a pluggable InputSizeEstimator for grouping fairly
 
 ALL CHANGES:
+  TEZ-2366. Pig tez MiniTezCluster unit tests fail intermittently after TEZ-2333
   TEZ-2406. Tez UI: Display per-io counter columns in task and attempt pages under vertex
   TEZ-2384. Add warning message in the case of prewarn under non-session mode.
   TEZ-2415. PMC RDF needs to use asfext:pmc, not asfext:PMC.

http://git-wip-us.apache.org/repos/asf/tez/blob/6a04fa48/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java
index 48fe0f2..61e0151 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java
@@ -33,7 +33,6 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.concurrent.Callable;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -48,7 +47,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.RawLocalFileSystem;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.util.DiskChecker.DiskErrorException;
-import org.apache.hadoop.yarn.api.ApplicationConstants;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.tez.common.CallableWithNdc;
 import org.apache.tez.common.security.JobTokenSecretManager;
@@ -75,6 +73,7 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
 
   private static final AtomicInteger fetcherIdGen = new AtomicInteger(0);
   private final Configuration conf;
+  private final int shufflePort;
 
   // Configurable fields.
   private CompressionCodec codec;
@@ -132,7 +131,8 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
       Path lockPath,
       boolean localDiskFetchEnabled,
       boolean sharedFetchEnabled,
-      String localHostname) {
+      String localHostname,
+      int shufflePort) {
     this.fetcherCallback = fetcherCallback;
     this.inputManager = inputManager;
     this.jobTokenSecretMgr = jobTokenSecretManager;
@@ -151,6 +151,7 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
     this.localDirAllocator = localDirAllocator;
     this.lockPath = lockPath;
     this.localHostname = localHostname;
+    this.shufflePort = shufflePort;
 
     try {
       if (this.sharedFetchEnabled) {
@@ -186,7 +187,7 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
 
     HostFetchResult hostFetchResult;
 
-    if (localDiskFetchEnabled && host.equals(localHostname)) {
+    if (localDiskFetchEnabled && host.equals(localHostname) && port == shufflePort) {
       hostFetchResult = setupLocalDiskFetch();
     } else if (multiplex) {
       hostFetchResult = doSharedFetch();
@@ -902,10 +903,10 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
     public FetcherBuilder(FetcherCallback fetcherCallback,
         HttpConnectionParams params, FetchedInputAllocator inputManager,
         ApplicationId appId, JobTokenSecretManager jobTokenSecretMgr, String srcNameTrimmed,
-        Configuration conf, boolean localDiskFetchEnabled, String localHostname) {
+        Configuration conf, boolean localDiskFetchEnabled, String localHostname, int shufflePort) {
       this.fetcher = new Fetcher(fetcherCallback, params, inputManager, appId,
           jobTokenSecretMgr, srcNameTrimmed, conf, null, null, null, localDiskFetchEnabled,
-          false, localHostname);
+          false, localHostname, shufflePort);
     }
 
     public FetcherBuilder(FetcherCallback fetcherCallback,
@@ -914,10 +915,10 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
         Configuration conf, RawLocalFileSystem localFs,
         LocalDirAllocator localDirAllocator, Path lockPath,
         boolean localDiskFetchEnabled, boolean sharedFetchEnabled,
-        String localHostname) {
+        String localHostname, int shufflePort) {
       this.fetcher = new Fetcher(fetcherCallback, params, inputManager, appId,
           jobTokenSecretMgr, srcNameTrimmed, conf, localFs, localDirAllocator,
-          lockPath, localDiskFetchEnabled, sharedFetchEnabled, localHostname);
+          lockPath, localDiskFetchEnabled, sharedFetchEnabled, localHostname, shufflePort);
     }
 
     public FetcherBuilder setHttpConnectionParameters(HttpConnectionParams httpParams) {

http://git-wip-us.apache.org/repos/asf/tez/blob/6a04fa48/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
index d47e652..ac7caca 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
@@ -21,6 +21,7 @@ package org.apache.tez.runtime.library.common.shuffle.impl;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.nio.ByteBuffer;
 import java.text.DecimalFormat;
 import java.util.Arrays;
 import java.util.BitSet;
@@ -161,7 +162,8 @@ public class ShuffleManager implements FetcherCallback {
   private final LocalDirAllocator localDirAllocator;
   private final RawLocalFileSystem localFs;
   private final Path[] localDisks;
-  private final static String localhostName = NetUtils.getHostname();
+  private final String localhostName;
+  private final int shufflePort;
 
   private final TezCounter shufflePhaseTime;
   private final TezCounter firstEventReceived;
@@ -216,7 +218,7 @@ public class ShuffleManager implements FetcherCallback {
 
     int maxConfiguredFetchers = 
         conf.getInt(
-            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES, 
+            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES,
             TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES_DEFAULT);
     
     this.numFetchers = Math.min(maxConfiguredFetchers, numInputs);
@@ -249,6 +251,10 @@ public class ShuffleManager implements FetcherCallback {
 
     this.localDisks = Iterables.toArray(
         localDirAllocator.getAllLocalPathsToRead(".", conf), Path.class);
+    this.localhostName = inputContext.getExecutionContext().getHostName();
+    final ByteBuffer shuffleMetaData =
+        inputContext.getServiceProviderMetaData(ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID);
+    this.shufflePort = ShuffleUtils.deserializeShuffleProviderMetaData(shuffleMetaData);
 
     Arrays.sort(this.localDisks);
 
@@ -390,7 +396,7 @@ public class ShuffleManager implements FetcherCallback {
       httpConnectionParams, inputManager, inputContext.getApplicationId(),
         jobTokenSecretMgr, srcNameTrimmed, conf, localFs, localDirAllocator,
         lockDisk, localDiskFetchEnabled, sharedFetchEnabled,
-        inputContext.getExecutionContext().getHostName());
+        localhostName, shufflePort);
 
     if (codec != null) {
       fetcherBuilder.setCompressionParameters(codec);

http://git-wip-us.apache.org/repos/asf/tez/blob/6a04fa48/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java
index fbaabff..60f1c98 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java
@@ -56,7 +56,7 @@ class FetcherOrderedGrouped extends Thread {
   private final Configuration conf;
   private final boolean localDiskFetchEnabled;
 
-  private static enum ShuffleErrors{IO_ERROR, WRONG_LENGTH, BAD_ID, WRONG_MAP,
+  private enum ShuffleErrors{IO_ERROR, WRONG_LENGTH, BAD_ID, WRONG_MAP,
                                     CONNECTION, WRONG_REDUCE}
   
   private final static String SHUFFLE_ERR_GRP_NAME = "Shuffle Errors";
@@ -72,7 +72,7 @@ class FetcherOrderedGrouped extends Thread {
   private final Shuffle shuffle;
   private final int id;
   private final String logIdentifier;
-  private final String localHostname;
+  private final String localShuffleHostPort;
   private static int nextId = 0;
   private int currentPartition = -1;
 
@@ -104,7 +104,8 @@ class FetcherOrderedGrouped extends Thread {
                                CompressionCodec codec,
                                InputContext inputContext, Configuration conf,
                                boolean localDiskFetchEnabled,
-                               String localHostname) throws IOException {
+                               String localHostname,
+                               int shufflePort) throws IOException {
     setDaemon(true);
     this.scheduler = scheduler;
     this.merger = merger;
@@ -134,7 +135,7 @@ class FetcherOrderedGrouped extends Thread {
       this.codec = null;
     }
     this.conf = conf;
-    this.localHostname = localHostname;
+    this.localShuffleHostPort = localHostname + ":" + String.valueOf(shufflePort);
 
     this.localDiskFetchEnabled = localDiskFetchEnabled;
 
@@ -144,37 +145,41 @@ class FetcherOrderedGrouped extends Thread {
     setDaemon(true);
   }  
 
-  public void run() {
+  @VisibleForTesting
+  protected void fetchNext() throws InterruptedException, IOException {
+    MapHost host = null;
     try {
-      while (!stopped && !Thread.currentThread().isInterrupted()) {
-        remaining = null; // Safety.
-        MapHost host = null;
-        try {
-          // If merge is on, block
-          merger.waitForInMemoryMerge();
+      // If merge is on, block
+      merger.waitForInMemoryMerge();
 
-          // In case usedMemory > memorylimit, wait until some memory is released
-          merger.waitForShuffleToMergeMemory();
+      // In case usedMemory > memorylimit, wait until some memory is released
+      merger.waitForShuffleToMergeMemory();
 
-          // Get a host to shuffle from
-          host = scheduler.getHost();
-          metrics.threadBusy();
+      // Get a host to shuffle from
+      host = scheduler.getHost();
+      metrics.threadBusy();
 
-          String hostPort = host.getHostIdentifier();
-          String hostname = hostPort.substring(0, hostPort.indexOf(":"));
-          if (localDiskFetchEnabled && hostname.equals(localHostname)) {
-            setupLocalDiskFetch(host);
-          } else {
-            // Shuffle
-            copyFromHost(host);
-          }
-        } finally {
-          cleanupCurrentConnection(false);
-          if (host != null) {
-            scheduler.freeHost(host);
-            metrics.threadFree();
-          }
-        }
+      String hostPort = host.getHostIdentifier();
+      if (localDiskFetchEnabled && hostPort.equals(localShuffleHostPort)) {
+        setupLocalDiskFetch(host);
+      } else {
+        // Shuffle
+        copyFromHost(host);
+      }
+    } finally {
+      cleanupCurrentConnection(false);
+      if (host != null) {
+        scheduler.freeHost(host);
+        metrics.threadFree();
+      }
+    }
+  }
+
+  public void run() {
+    try {
+      while (!stopped && !Thread.currentThread().isInterrupted()) {
+        remaining = null; // Safety.
+        fetchNext();
       }
     } catch (InterruptedException ie) {
       //TODO: might not be respected when fetcher is in progress / server is busy.  TEZ-711

http://git-wip-us.apache.org/repos/asf/tez/blob/6a04fa48/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
index 442f032..ee05378 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
@@ -19,6 +19,7 @@ package org.apache.tez.runtime.library.common.shuffle.orderedgrouped;
 
 import java.io.IOException;
 import java.lang.reflect.UndeclaredThrowableException;
+import java.nio.ByteBuffer;
 import java.util.List;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
@@ -95,6 +96,8 @@ public class Shuffle implements ExceptionReporter {
   private final int ifileReadAheadLength;
   private final int numFetchers;
   private final boolean localDiskFetchEnabled;
+  private final String localHostname;
+  private final int shufflePort;
   
   private AtomicReference<Throwable> throwable = new AtomicReference<Throwable>();
   private String throwingThreadName = null;
@@ -158,6 +161,11 @@ public class Shuffle implements ExceptionReporter {
     LocalDirAllocator localDirAllocator = 
         new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
 
+    this.localHostname = inputContext.getExecutionContext().getHostName();
+    final ByteBuffer shuffleMetadata =
+        inputContext.getServiceProviderMetaData(ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID);
+    this.shufflePort = ShuffleUtils.deserializeShuffleProviderMetaData(shuffleMetadata);
+
     // TODO TEZ Get rid of Map / Reduce references.
     TezCounter shuffledInputsCounter = 
         inputContext.getCounters().findCounter(TaskCounter.NUM_SHUFFLED_INPUTS);
@@ -336,8 +344,7 @@ public class Shuffle implements ExceptionReporter {
               FetcherOrderedGrouped
                 fetcher = new FetcherOrderedGrouped(httpConnectionParams, scheduler, merger,
                 metrics, Shuffle.this, jobTokenSecretMgr, ifileReadAhead, ifileReadAheadLength,
-                codec, inputContext, conf, localDiskFetchEnabled,
-                inputContext.getExecutionContext().getHostName());
+                codec, inputContext, conf, localDiskFetchEnabled, localHostname, shufflePort);
               fetchers.add(fetcher);
               fetcher.start();
             }

http://git-wip-us.apache.org/repos/asf/tez/blob/6a04fa48/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
index e6f0c4a..4ef187d 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
@@ -31,6 +31,7 @@ import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.verify;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.List;
 
@@ -38,8 +39,11 @@ import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DataOutputBuffer;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.runtime.api.ExecutionContext;
+import org.apache.tez.runtime.api.InputContext;
 import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
 import org.apache.tez.runtime.library.common.InputAttemptIdentifier;
 import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord;
@@ -52,19 +56,21 @@ import org.mockito.stubbing.Answer;
 public class TestFetcher {
   private static final String SHUFFLE_INPUT_FILE_PREFIX = "shuffle_input_file_";
   private static String HOST = "localhost";
-  private static int PORT = 0;
+  private static int PORT = 41;
 
   @Test(timeout = 3000)
   public void testLocalFetchModeSetting() throws Exception {
     TezConfiguration conf = new TezConfiguration();
-    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, "true");
+    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true);
     InputAttemptIdentifier[] srcAttempts = {
         new InputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1")
     };
     FetcherCallback fetcherCallback = mock(FetcherCallback.class);
+    final boolean ENABLE_LOCAL_FETCH = true;
+    final boolean DISABLE_LOCAL_FETCH = false;
 
     Fetcher.FetcherBuilder builder = new Fetcher.FetcherBuilder(fetcherCallback, null, null,
-        ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, true, HOST);
+        ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, ENABLE_LOCAL_FETCH, HOST, PORT);
     builder.assignWork(HOST, PORT, 0, Arrays.asList(srcAttempts));
     Fetcher fetcher = spy(builder.build());
 
@@ -79,10 +85,41 @@ public class TestFetcher {
     verify(fetcher).setupLocalDiskFetch();
     verify(fetcher, never()).doHttpFetch();
 
+    // when enabled and hostname does not match use http fetch.
+    builder = new Fetcher.FetcherBuilder(fetcherCallback, null, null,
+        ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, ENABLE_LOCAL_FETCH, HOST,
+        PORT);
+    builder.assignWork(HOST + "_OTHER", PORT, 0, Arrays.asList(srcAttempts));
+    fetcher = spy(builder.build());
+
+    doReturn(null).when(fetcher).setupLocalDiskFetch();
+    doReturn(hfr).when(fetcher).doHttpFetch();
+    doNothing().when(fetcher).shutdown();
+
+    fetcher.call();
+
+    verify(fetcher, never()).setupLocalDiskFetch();
+    verify(fetcher).doHttpFetch();
+
+    // when enabled and port does not match use http fetch.
+    builder = new Fetcher.FetcherBuilder(fetcherCallback, null, null,
+        ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, ENABLE_LOCAL_FETCH, HOST, PORT);
+    builder.assignWork(HOST, PORT + 1, 0, Arrays.asList(srcAttempts));
+    fetcher = spy(builder.build());
+
+    doReturn(null).when(fetcher).setupLocalDiskFetch();
+    doReturn(hfr).when(fetcher).doHttpFetch();
+    doNothing().when(fetcher).shutdown();
+
+    fetcher.call();
+
+    verify(fetcher, never()).setupLocalDiskFetch();
+    verify(fetcher).doHttpFetch();
+
     // When disabled use http fetch
-    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, "false");
+    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, false);
     builder = new Fetcher.FetcherBuilder(fetcherCallback, null, null,
-        ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, false, HOST);
+        ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, DISABLE_LOCAL_FETCH, HOST, PORT);
     builder.assignWork(HOST, PORT, 0, Arrays.asList(srcAttempts));
     fetcher = spy(builder.build());
 
@@ -115,7 +152,7 @@ public class TestFetcher {
     int partition = 42;
     FetcherCallback callback = mock(FetcherCallback.class);
     Fetcher.FetcherBuilder builder = new Fetcher.FetcherBuilder(callback, null, null,
-        ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, true, HOST);
+        ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, true, HOST, PORT);
     builder.assignWork(HOST, PORT, partition, Arrays.asList(srcAttempts));
     Fetcher fetcher = spy(builder.build());
 

http://git-wip-us.apache.org/repos/asf/tez/blob/6a04fa48/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java
index 44122a2..c452898 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java
@@ -49,11 +49,13 @@ import org.apache.tez.common.security.JobTokenIdentifier;
 import org.apache.tez.common.security.JobTokenSecretManager;
 import org.apache.tez.dag.api.TezConstants;
 import org.apache.tez.runtime.api.Event;
+import org.apache.tez.runtime.api.ExecutionContext;
 import org.apache.tez.runtime.api.InputContext;
 import org.apache.tez.runtime.api.events.DataMovementEvent;
 import org.apache.tez.runtime.library.common.InputAttemptIdentifier;
 import org.apache.tez.runtime.library.common.InputIdentifier;
 import org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator;
+import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils;
 import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto;
 import org.junit.Test;
 
@@ -156,10 +158,20 @@ public class TestShuffleInputEventHandlerImpl {
   }
 
 
-  private InputContext createInputContext() {
+  private InputContext createInputContext() throws IOException {
+    DataOutputBuffer port_dob = new DataOutputBuffer();
+    port_dob.writeInt(PORT);
+    final ByteBuffer shuffleMetaData = ByteBuffer.wrap(port_dob.getData(), 0, port_dob.getLength());
+
+    ExecutionContext executionContext = mock(ExecutionContext.class);
+    doReturn(HOST).when(executionContext).getHostName();
+
     InputContext inputContext = mock(InputContext.class);
     doReturn(new TezCounters()).when(inputContext).getCounters();
     doReturn("sourceVertex").when(inputContext).getSourceVertexName();
+    doReturn(shuffleMetaData).when(inputContext)
+        .getServiceProviderMetaData(ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID);
+    doReturn(executionContext).when(inputContext).getExecutionContext();
     return inputContext;
   }
 

http://git-wip-us.apache.org/repos/asf/tez/blob/6a04fa48/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java
index 2e826d8..c33905f 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java
@@ -29,6 +29,7 @@ import static org.mockito.Mockito.doAnswer;
 import static org.mockito.Mockito.doNothing;
 import static org.mockito.Mockito.doReturn;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
 import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
@@ -39,9 +40,11 @@ import java.io.DataInputStream;
 import java.io.IOException;
 import java.net.HttpURLConnection;
 import java.net.URL;
+import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.List;
 
+import org.apache.hadoop.io.DataOutputBuffer;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -68,11 +71,76 @@ public class TestFetcher {
 
   public static final String SHUFFLE_INPUT_FILE_PREFIX = "shuffle_input_file_";
   public static final String HOST = "localhost";
-  public static final int PORT = 0;
+  public static final int PORT = 65;
 
   static final Logger LOG = LoggerFactory.getLogger(TestFetcher.class);
 
   @Test(timeout = 5000)
+  public void testLocalFetchModeSetting1() throws Exception {
+    Configuration conf = new TezConfiguration();
+    ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
+    MergeManager merger = mock(MergeManager.class);
+    ShuffleClientMetrics metrics = mock(ShuffleClientMetrics.class);
+    Shuffle shuffle = mock(Shuffle.class);
+
+    InputContext inputContext = mock(InputContext.class);
+    doReturn(new TezCounters()).when(inputContext).getCounters();
+    doReturn("src vertex").when(inputContext).getSourceVertexName();
+
+    final boolean ENABLE_LOCAL_FETCH = true;
+    final boolean DISABLE_LOCAL_FETCH = false;
+    MapHost mapHost = new MapHost(0, HOST + ":" + PORT, "baseurl");
+    FetcherOrderedGrouped
+        fetcher = new FetcherOrderedGrouped(null, scheduler, merger, metrics, shuffle, null,
+        false, 0, null, inputContext, conf, ENABLE_LOCAL_FETCH, HOST, PORT);
+
+    // when local mode is enabled and host and port matches use local fetch
+    FetcherOrderedGrouped spyFetcher = spy(fetcher);
+    doNothing().when(spyFetcher).setupLocalDiskFetch(mapHost);
+    doReturn(mapHost).when(scheduler).getHost();
+
+    spyFetcher.fetchNext();
+
+    verify(spyFetcher, times(1)).setupLocalDiskFetch(mapHost);
+    verify(spyFetcher, never()).copyFromHost(any(MapHost.class));
+
+    // if hostname does not match use http
+    spyFetcher = spy(fetcher);
+    mapHost = new MapHost(0, HOST + "_OTHER" + ":" + PORT, "baseurl");
+    doNothing().when(spyFetcher).setupLocalDiskFetch(mapHost);
+    doReturn(mapHost).when(scheduler).getHost();
+
+    spyFetcher.fetchNext();
+
+    verify(spyFetcher, never()).setupLocalDiskFetch(any(MapHost.class));
+    verify(spyFetcher, times(1)).copyFromHost(mapHost);
+
+    // if port does not match use http
+    spyFetcher = spy(fetcher);
+    mapHost = new MapHost(0, HOST + ":" + (PORT + 1), "baseurl");
+    doNothing().when(spyFetcher).setupLocalDiskFetch(mapHost);
+    doReturn(mapHost).when(scheduler).getHost();
+
+    spyFetcher.fetchNext();
+
+    verify(spyFetcher, never()).setupLocalDiskFetch(any(MapHost.class));
+    verify(spyFetcher, times(1)).copyFromHost(mapHost);
+
+    //if local fetch is not enabled
+    mapHost = new MapHost(0, HOST + ":" + PORT, "baseurl");
+    fetcher = new FetcherOrderedGrouped(null, scheduler, merger, metrics, shuffle, null,
+        false, 0, null, inputContext, conf, DISABLE_LOCAL_FETCH, HOST, PORT);
+    spyFetcher = spy(fetcher);
+    doNothing().when(spyFetcher).setupLocalDiskFetch(mapHost);
+    doReturn(mapHost).when(scheduler).getHost();
+
+    spyFetcher.fetchNext();
+
+    verify(spyFetcher, never()).setupLocalDiskFetch(any(MapHost.class));
+    verify(spyFetcher, times(1)).copyFromHost(mapHost);
+  }
+
+  @Test(timeout = 5000)
   public void testSetupLocalDiskFetch() throws Exception {
     Configuration conf = new TezConfiguration();
     ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
@@ -85,7 +153,7 @@ public class TestFetcher {
 
     FetcherOrderedGrouped
         fetcher = new FetcherOrderedGrouped(null, scheduler, merger, metrics, shuffle, null,
-        false, 0, null, inputContext, conf, true, HOST);
+        false, 0, null, inputContext, conf, true, HOST, PORT);
     FetcherOrderedGrouped spyFetcher = spy(fetcher);
 
     MapHost host = new MapHost(1, HOST + ":" + PORT,
@@ -228,7 +296,7 @@ public class TestFetcher {
         ShuffleUtils.constructHttpShuffleConnectionParams(conf);
     FetcherOrderedGrouped mockFetcher =
         new FetcherOrderedGrouped(httpConnectionParams, scheduler, merger, metrics, shuffle, null,
-            false, 0, null, inputContext, conf, false, HOST);
+            false, 0, null, inputContext, conf, false, HOST, PORT);
     final FetcherOrderedGrouped fetcher = spy(mockFetcher);
 
     final MapHost host = new MapHost(1, HOST + ":" + PORT,


[18/43] tez git commit: TEZ-2019. Temporarily allow the scheduler and launcher to be specified via configuration. (sseth)

Posted by ss...@apache.org.
TEZ-2019. Temporarily allow the scheduler and launcher to be specified
via configuration. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/44bea934
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/44bea934
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/44bea934

Branch: refs/heads/TEZ-2003
Commit: 44bea934be1441323ed764692ec9287f06fdc2c2
Parents: ce69aa1
Author: Siddharth Seth <ss...@apache.org>
Authored: Fri Jan 30 16:02:32 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:36:09 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                            |  4 +++
 .../apache/tez/dag/api/TezConfiguration.java    |  6 ++++
 .../org/apache/tez/dag/app/DAGAppMaster.java    | 30 ++++++++++++++++-
 .../dag/app/rm/TaskSchedulerEventHandler.java   | 34 ++++++++++++++++++--
 .../org/apache/tez/runtime/task/TezChild.java   |  3 +-
 5 files changed, 73 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/44bea934/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
new file mode 100644
index 0000000..1822fcb
--- /dev/null
+++ b/TEZ-2003-CHANGES.txt
@@ -0,0 +1,4 @@
+ALL CHANGES:
+  TEZ-2019. Temporarily allow the scheduler and launcher to be specified via configuration.
+
+INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/44bea934/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
index e4170e9..01e724e 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
@@ -1168,6 +1168,12 @@ public class TezConfiguration extends Configuration {
       + "tez-ui.webservice.enable";
   public static final boolean TEZ_AM_WEBSERVICE_ENABLE_DEFAULT = true;
 
+  @ConfigurationScope(Scope.VERTEX)
+  public static final String TEZ_AM_CONTAINER_LAUNCHER_CLASS = TEZ_AM_PREFIX + "container-launcher.class";
+  @ConfigurationScope(Scope.VERTEX)
+  public static final String TEZ_AM_TASK_SCHEDULER_CLASS = TEZ_AM_PREFIX + "task-scheduler.class";
+
+
   // TODO only validate property here, value can also be validated if necessary
   public static void validateProperty(String property, Scope usedScope) {
     Scope validScope = PropertyScope.get(property);

http://git-wip-us.apache.org/repos/asf/tez/blob/44bea934/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index 3e3d6f0..73ee56e 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -25,6 +25,8 @@ import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.PrintWriter;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
@@ -93,6 +95,7 @@ import org.apache.hadoop.yarn.util.SystemClock;
 import org.apache.tez.common.AsyncDispatcher;
 import org.apache.tez.common.AsyncDispatcherConcurrent;
 import org.apache.tez.common.GcTimeUpdater;
+import org.apache.tez.common.ReflectionUtils;
 import org.apache.tez.common.TezCommonUtils;
 import org.apache.tez.common.TezConverterUtils;
 import org.apache.tez.common.TezUtilsInternal;
@@ -1034,9 +1037,34 @@ public class DAGAppMaster extends AbstractService {
   protected ContainerLauncher
       createContainerLauncher(final AppContext context) throws UnknownHostException {
     if(isLocal){
+      LOG.info("Creating LocalContainerLauncher");
       return new LocalContainerLauncher(context, taskAttemptListener, workingDirectory);
     } else {
-      return new ContainerLauncherImpl(context);
+      // TODO: Temporary reflection with specific parameters until a clean interface is defined.
+      String containerLauncherClassName = getConfig().get(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHER_CLASS);
+      if (containerLauncherClassName == null) {
+        LOG.info("Creating Default Container Launcher");
+        return new ContainerLauncherImpl(context);
+      } else {
+        LOG.info("Creating container launcher : " + containerLauncherClassName);
+        Class<? extends ContainerLauncher> containerLauncherClazz = (Class<? extends ContainerLauncher>) ReflectionUtils.getClazz(
+            containerLauncherClassName);
+        try {
+          Constructor<? extends ContainerLauncher> ctor = containerLauncherClazz
+              .getConstructor(AppContext.class, Configuration.class, TaskAttemptListener.class);
+          ctor.setAccessible(true);
+          ContainerLauncher instance = ctor.newInstance(context, getConfig(), taskAttemptListener);
+          return instance;
+        } catch (NoSuchMethodException e) {
+          throw new TezUncheckedException(e);
+        } catch (InvocationTargetException e) {
+          throw new TezUncheckedException(e);
+        } catch (InstantiationException e) {
+          throw new TezUncheckedException(e);
+        } catch (IllegalAccessException e) {
+          throw new TezUncheckedException(e);
+        }
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/tez/blob/44bea934/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
index 19db660..62f82db 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
@@ -18,6 +18,8 @@
 
 package org.apache.tez.dag.app.rm;
 
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
 import java.net.InetSocketAddress;
 import java.nio.ByteBuffer;
 import java.util.List;
@@ -42,6 +44,7 @@ import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.event.Event;
 import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.tez.common.ReflectionUtils;
 import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.api.TaskLocationHint;
@@ -329,12 +332,39 @@ public class TaskSchedulerEventHandler extends AbstractService
     boolean isLocal = getConfig().getBoolean(TezConfiguration.TEZ_LOCAL_MODE,
         TezConfiguration.TEZ_LOCAL_MODE_DEFAULT);
     if (isLocal) {
+      LOG.info("Using TaskScheduler: LocalTaskSchedulerService");
       return new LocalTaskSchedulerService(this, this.containerSignatureMatcher,
           host, port, trackingUrl, appContext);
     }
     else {
-      return new YarnTaskSchedulerService(this, this.containerSignatureMatcher,
-          host, port, trackingUrl, appContext);
+      String schedulerClassName = getConfig().get(TezConfiguration.TEZ_AM_TASK_SCHEDULER_CLASS);
+      if (schedulerClassName == null) {
+        LOG.info("Using TaskScheduler: YarnTaskSchedulerService");
+        return new YarnTaskSchedulerService(this, this.containerSignatureMatcher,
+            host, port, trackingUrl, appContext);
+      } else {
+        LOG.info("Using custom TaskScheduler: " + schedulerClassName);
+        // TODO Temporary reflection with specific parameters. Remove once there is a clean interface.
+        Class<? extends TaskSchedulerService> taskSchedulerClazz =
+            (Class<? extends TaskSchedulerService>) ReflectionUtils.getClazz(schedulerClassName);
+        try {
+          Constructor<? extends TaskSchedulerService> ctor = taskSchedulerClazz
+              .getConstructor(TaskSchedulerAppCallback.class, AppContext.class, String.class,
+                  Integer.class, String.class, Configuration.class);
+          ctor.setAccessible(true);
+          TaskSchedulerService taskSchedulerService =
+              ctor.newInstance(this, appContext, host, port, trackingUrl, getConfig());
+          return taskSchedulerService;
+        } catch (NoSuchMethodException e) {
+          throw new TezUncheckedException(e);
+        } catch (InvocationTargetException e) {
+          throw new TezUncheckedException(e);
+        } catch (InstantiationException e) {
+          throw new TezUncheckedException(e);
+        } catch (IllegalAccessException e) {
+          throw new TezUncheckedException(e);
+        }
+      }
     }
   }
   

http://git-wip-us.apache.org/repos/asf/tez/blob/44bea934/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
index bfec349..fd55992 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
@@ -355,7 +355,8 @@ public class TezChild {
       }
       if (!isLocal) {
         RPC.stopProxy(umbilical);
-        LogManager.shutdown();
+        // TODO Temporary change. Revert. Ideally, move this over to the main method in TezChild if possible.
+//        LogManager.shutdown();
       }
     }
   }


[13/43] tez git commit: TEZ-776. Reduce AM mem usage caused by storing TezEvents (bikas)

Posted by ss...@apache.org.
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
index 5d61642..a16ee0a 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
@@ -91,6 +91,7 @@ import org.apache.tez.dag.api.records.DAGProtos.RootInputLeafOutputProto;
 import org.apache.tez.dag.api.records.DAGProtos.VertexPlan;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.dag.app.ContainerContext;
+import org.apache.tez.dag.app.TaskAttemptEventInfo;
 import org.apache.tez.dag.app.TaskAttemptListener;
 import org.apache.tez.dag.app.TaskHeartbeatHandler;
 import org.apache.tez.dag.app.dag.DAG;
@@ -134,6 +135,7 @@ import org.apache.tez.dag.app.dag.event.VertexEventTermination;
 import org.apache.tez.dag.app.dag.event.VertexEventType;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEvent;
 import org.apache.tez.dag.app.dag.impl.DAGImpl.VertexGroupInfo;
+import org.apache.tez.dag.app.dag.impl.Edge.PendingEventRouteMetadata;
 import org.apache.tez.dag.app.dag.speculation.legacy.LegacySpeculator;
 import org.apache.tez.dag.history.DAGHistoryEvent;
 import org.apache.tez.dag.history.HistoryEvent;
@@ -202,7 +204,6 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
   //final fields
   private final Clock clock;
 
-
   private final Lock readLock;
   private final Lock writeLock;
   private final TaskAttemptListener taskAttemptListener;
@@ -225,6 +226,9 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
   private Configuration vertexConf;
   
   private final boolean isSpeculationEnabled;
+  
+  @VisibleForTesting
+  public boolean useOnDemandRouting = true;
 
   //fields initialized in init
 
@@ -726,9 +730,18 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
   private Set<String> inputsWithInitializers;
   private int numInitializedInputs;
   private boolean startSignalPending = false;
-  private boolean tasksNotYetScheduled = true;
   // We may always store task events in the vertex for scalability
   List<TezEvent> pendingTaskEvents = Lists.newLinkedList();
+  private boolean tasksNotYetScheduled = true;
+  // must be a random access structure
+  
+  private final List<EventInfo> onDemandRouteEvents = Lists.newArrayListWithCapacity(1000);
+  private final ReadWriteLock onDemandRouteEventsReadWriteLock = new ReentrantReadWriteLock();
+  private final Lock onDemandRouteEventsReadLock = onDemandRouteEventsReadWriteLock.readLock();
+  private final Lock onDemandRouteEventsWriteLock = onDemandRouteEventsReadWriteLock.writeLock();
+  
+  private static final List<TezEvent> EMPTY_TASK_ATTEMPT_TEZ_EVENTS =
+      new ArrayList(0);
   List<TezEvent> pendingRouteEvents = new LinkedList<TezEvent>();
   List<TezTaskAttemptID> pendingReportedSrcCompletions = Lists.newLinkedList();
 
@@ -771,6 +784,17 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
   private VertexStats vertexStats = null;
 
   private final TaskSpecificLaunchCmdOption taskSpecificLaunchCmdOpts;
+  
+  static class EventInfo {
+    final TezEvent tezEvent;
+    final Edge eventEdge;
+    final int eventTaskIndex;
+    EventInfo(TezEvent tezEvent, Edge eventEdge, int eventTaskIndex) {
+      this.tezEvent = tezEvent;
+      this.eventEdge = eventEdge;
+      this.eventTaskIndex = eventTaskIndex;
+    }
+  }
 
   private VertexStatisticsImpl finalStatistics;
 
@@ -1175,6 +1199,11 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
     }
   }
 
+  @VisibleForTesting
+  List<EventInfo> getOnDemandRouteEvents() {
+    return onDemandRouteEvents;
+  }
+  
   private void computeProgress() {
     this.readLock.lock();
     try {
@@ -1388,24 +1417,51 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
     }
   }
   
-  @Override
-  public void scheduleTasks(List<TaskWithLocationHint> tasksToSchedule) {
-    writeLock.lock();
-    try {
+  void setupEdgeRouting() throws AMUserCodeException {
+    for (Edge e : sourceVertices.values()) {
+      boolean edgeDoingOnDemand = e.routingToBegin();
+      if (useOnDemandRouting && !edgeDoingOnDemand) {
+        useOnDemandRouting = false;
+        LOG.info("Not using ondemand routing because of edge between " + e.getSourceVertexName()
+            + " and " + getLogIdentifier());
+      }
+    }
+  }
+  
+  private void unsetTasksNotYetScheduled() throws AMUserCodeException {
+    if (tasksNotYetScheduled) {
+      setupEdgeRouting();
       tasksNotYetScheduled = false;
+      // only now can we be sure of the edge manager type. so until now
+      // we will accumulate pending tasks in case legacy routing gets used.
+      // this is only needed to support mixed mode routing. Else for
+      // on demand routing events can be directly added to taskEvents when 
+      // they arrive in handleRoutedEvents instead of first caching them in 
+      // pendingTaskEvents. When legacy routing is removed then pendingTaskEvents
+      // can be removed.
       if (!pendingTaskEvents.isEmpty()) {
         LOG.info("Routing pending task events for vertex: " + logIdentifier);
         try {
-          handleRoutedTezEvents(this, pendingTaskEvents, false, true);
+          handleRoutedTezEvents(pendingTaskEvents, false, true);
         } catch (AMUserCodeException e) {
-          String msg = "Exception in " + e.getSource() +", vertex=" + logIdentifier;
+          String msg = "Exception in " + e.getSource() + ", vertex=" + logIdentifier;
           LOG.error(msg, e);
-          addDiagnostic(msg + ", " + e.getMessage() + ", " + ExceptionUtils.getStackTrace(e.getCause()));
-          eventHandler.handle(new VertexEventTermination(vertexId, VertexTerminationCause.AM_USERCODE_FAILURE));
+          addDiagnostic(msg + ", " + e.getMessage() + ", "
+              + ExceptionUtils.getStackTrace(e.getCause()));
+          eventHandler.handle(new VertexEventTermination(vertexId,
+              VertexTerminationCause.AM_USERCODE_FAILURE));
           return;
         }
         pendingTaskEvents.clear();
       }
+    }
+  }
+  
+  @Override
+  public void scheduleTasks(List<TaskWithLocationHint> tasksToSchedule) {
+    writeLock.lock();
+    try {
+      unsetTasksNotYetScheduled();
       for (TaskWithLocationHint task : tasksToSchedule) {
         if (numTasks <= task.getTaskIndex().intValue()) {
           throw new TezUncheckedException(
@@ -1422,6 +1478,13 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
             TezTaskID.getInstance(vertexId, task.getTaskIndex().intValue()),
             TaskEventType.T_SCHEDULE));
       }
+    } catch (AMUserCodeException e) {
+      String msg = "Exception in " + e.getSource() + ", vertex=" + getLogIdentifier();
+      LOG.error(msg, e);
+      // send event to fail the vertex
+      eventHandler.handle(new VertexEventManagerUserCodeError(getVertexId(), e));
+      // throw an unchecked exception to stop the vertex manager that invoked this.
+      throw new TezUncheckedException(e);
     } finally {
       writeLock.unlock();
     }
@@ -2497,7 +2560,6 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
           }
           break;
         case RUNNING:
-          vertex.tasksNotYetScheduled = false;
           try {
             vertex.initializeCommitters();
           } catch (Exception e) {
@@ -2530,6 +2592,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
             }
             try {
               vertex.recoveryCodeSimulatingStart();
+              vertex.unsetTasksNotYetScheduled();
               endState = VertexState.RUNNING;
             } catch (AMUserCodeException e) {
               String msg = "Exception in " + e.getSource() + ", vertex:" + vertex.getLogIdentifier();
@@ -2560,7 +2623,6 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
                 VertexTerminationCause.COMMIT_FAILURE, msg);
             endState = VertexState.FAILED;
           } else {
-            vertex.tasksNotYetScheduled = false;
             // recover tasks
             if (vertex.tasks != null && vertex.numTasks != 0) {
               TaskState taskState = TaskState.KILLED;
@@ -2578,6 +2640,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
               }
               try {
                 vertex.recoveryCodeSimulatingStart();
+                vertex.unsetTasksNotYetScheduled();
                 endState = VertexState.RUNNING;
               } catch (AMUserCodeException e) {
                 String msg = "Exception in " + e.getSource() +", vertex:" + vertex.getLogIdentifier();
@@ -2901,7 +2964,6 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
           endState = VertexState.INITED;
           break;
         case RUNNING:
-          vertex.tasksNotYetScheduled = false;
           // if commit in progress and desired state is not a succeeded one,
           // move to failed
           if (vertex.recoveryCommitInProgress) {
@@ -2946,6 +3008,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
             }
             try {
               vertex.recoveryCodeSimulatingStart();
+              vertex.unsetTasksNotYetScheduled();
               endState = VertexState.RUNNING;
             } catch (AMUserCodeException e) {
               String msg = "Exception in " + e.getSource() + ", vertex=" + vertex.getLogIdentifier();
@@ -2962,7 +3025,6 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
         case SUCCEEDED:
         case FAILED:
         case KILLED:
-          vertex.tasksNotYetScheduled = false;
           // recover tasks
           assert vertex.tasks.size() == vertex.numTasks;
           if (vertex.tasks != null  && vertex.numTasks != 0) {
@@ -2982,6 +3044,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
             // Wait for all tasks to recover and report back
             try {
               vertex.recoveryCodeSimulatingStart();
+              vertex.unsetTasksNotYetScheduled();
               endState = VertexState.RUNNING;
             } catch (AMUserCodeException e) {
               String msg = "Exception in " + e.getSource() + ", vertex:" + vertex.getLogIdentifier();
@@ -3025,7 +3088,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
         vertex.recoveredEvents.clear();
         if (!vertex.pendingRouteEvents.isEmpty()) {
           try {
-            handleRoutedTezEvents(vertex, vertex.pendingRouteEvents, false, true);
+            vertex.handleRoutedTezEvents(vertex.pendingRouteEvents, false, true);
             vertex.pendingRouteEvents.clear();
           } catch (AMUserCodeException e) {
             String msg = "Exception in " + e.getSource() + ", vertex=" + vertex.getLogIdentifier();
@@ -3284,7 +3347,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
       List<TezEvent> inputInfoEvents = iEvent.getEvents();
       try {
         if (inputInfoEvents != null && !inputInfoEvents.isEmpty()) {
-          VertexImpl.handleRoutedTezEvents(vertex, inputInfoEvents, false, false);
+          vertex.handleRoutedTezEvents(inputInfoEvents, false, false);
         }
       } catch (AMUserCodeException e) {
         String msg = "Exception in " + e.getSource() + ", vertex:" + vertex.getLogIdentifier();
@@ -3941,7 +4004,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
       boolean recovered = rEvent.isRecovered();
       List<TezEvent> tezEvents = rEvent.getEvents();
       try {
-        VertexImpl.handleRoutedTezEvents(vertex, tezEvents, recovered, false);
+        vertex.handleRoutedTezEvents(tezEvents, recovered, false);
       } catch (AMUserCodeException e) {
         String msg = "Exception in " + e.getSource() + ", vertex=" + vertex.getLogIdentifier();
         LOG.error(msg, e);
@@ -3959,16 +4022,105 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
       return vertex.getState();
     }
   }
+  
+  @Override
+  public TaskAttemptEventInfo getTaskAttemptTezEvents(TezTaskAttemptID attemptID,
+      int fromEventId, int maxEvents) {
+    if (!useOnDemandRouting) {
+      List<TezEvent> events = getTask(attemptID.getTaskID()).getTaskAttemptTezEvents(attemptID, fromEventId, maxEvents);
+      return new TaskAttemptEventInfo(fromEventId + events.size(), events);
+    }
 
-  private static void handleRoutedTezEvents(VertexImpl vertex, List<TezEvent> tezEvents, boolean recovered, boolean isPendingEvents) throws AMUserCodeException {
-    if (vertex.getAppContext().isRecoveryEnabled()
+    onDemandRouteEventsReadLock.lock();
+    try {
+      List<TezEvent> events = EMPTY_TASK_ATTEMPT_TEZ_EVENTS;
+      int nextFromEventId = fromEventId;
+      int currEventCount = onDemandRouteEvents.size();
+      try {
+        if (currEventCount > fromEventId) {
+          events = Lists.newArrayListWithCapacity(maxEvents);
+          int taskIndex = attemptID.getTaskID().getId();
+          Preconditions.checkState(taskIndex < tasks.size(), "Invalid task index for TA: " + attemptID
+              + " vertex: " + getLogIdentifier());
+          boolean isFirstEvent = true;
+          for (nextFromEventId = fromEventId; nextFromEventId < currEventCount; ++nextFromEventId) {
+            boolean earlyExit = false;
+            if (events.size() == maxEvents) {
+              break;
+            }
+            EventInfo eventInfo = onDemandRouteEvents.get(nextFromEventId);
+            TezEvent tezEvent = eventInfo.tezEvent;
+            switch(tezEvent.getEventType()) {
+            case INPUT_FAILED_EVENT:
+            case DATA_MOVEMENT_EVENT:
+            case COMPOSITE_DATA_MOVEMENT_EVENT:
+              {
+                int srcTaskIndex = eventInfo.eventTaskIndex;
+                Edge srcEdge = eventInfo.eventEdge;
+                PendingEventRouteMetadata pendingRoute = null;
+                if (isFirstEvent) {
+                  // do this precondition check only for the first event
+                  isFirstEvent = false;
+                  pendingRoute = srcEdge.removePendingEvents(attemptID);
+                  if (pendingRoute != null) {
+                    Preconditions.checkState(tezEvent == pendingRoute.getTezEvent()); // same object
+                  }
+                }
+                if (!srcEdge.maybeAddTezEventForDestinationTask(tezEvent, attemptID, srcTaskIndex,
+                    events, maxEvents, pendingRoute)) {
+                  // not enough space left for this iteration events.
+                  // Exit and start from here next time
+                  earlyExit = true;
+                }
+              }
+              break;
+            case ROOT_INPUT_DATA_INFORMATION_EVENT:
+              {
+                InputDataInformationEvent riEvent = (InputDataInformationEvent) tezEvent.getEvent();
+                if (riEvent.getTargetIndex() == taskIndex) {
+                  events.add(tezEvent);
+                }
+              }
+              break;
+            default:
+              throw new TezUncheckedException("Unexpected event type for task: "
+                  + tezEvent.getEventType());
+            }
+            if (earlyExit) {
+              break;
+            }
+          }
+        }
+      } catch (AMUserCodeException e) {
+        String msg = "Exception in " + e.getSource() + ", vertex=" + getLogIdentifier();
+        LOG.error(msg, e);
+        eventHandler.handle(new VertexEventManagerUserCodeError(getVertexId(), e));
+        nextFromEventId = fromEventId;
+        events.clear();
+      }
+  
+      if (events.size() > 0) {
+        StringBuilder builder = new StringBuilder();
+        builder.append("Sending ").append(attemptID).append(" numEvents: ").append(events.size())
+        .append(" from: ").append(fromEventId).append(" to: ").append(nextFromEventId)
+        .append(" out of ").append(currEventCount).append(" events in vertex: ").append(getLogIdentifier());
+        LOG.info(builder.toString());
+      }
+      return new TaskAttemptEventInfo(nextFromEventId, events);
+    } finally {
+      onDemandRouteEventsReadLock.unlock();
+    }
+  }
+
+  private void handleRoutedTezEvents(List<TezEvent> tezEvents, boolean recovered, boolean isPendingEvents) throws AMUserCodeException {
+    if (getAppContext().isRecoveryEnabled()
         && !recovered
         && !isPendingEvents
         && !tezEvents.isEmpty()) {
       List<TezEvent> recoveryEvents =
           Lists.newArrayList();
       for (TezEvent tezEvent : tezEvents) {
-        if (!isEventFromVertex(vertex, tezEvent.getSourceInfo())) {
+        if (!isEventFromVertex(this, tezEvent.getSourceInfo())) {
           continue;
         }
         if  (tezEvent.getEventType().equals(EventType.COMPOSITE_DATA_MOVEMENT_EVENT)
@@ -3980,15 +4132,15 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
       }
       if (!recoveryEvents.isEmpty()) {
         VertexRecoverableEventsGeneratedEvent historyEvent =
-            new VertexRecoverableEventsGeneratedEvent(vertex.vertexId,
+            new VertexRecoverableEventsGeneratedEvent(vertexId,
                 recoveryEvents);
-        vertex.appContext.getHistoryHandler().handle(
-            new DAGHistoryEvent(vertex.getDAGId(), historyEvent));
+        appContext.getHistoryHandler().handle(
+            new DAGHistoryEvent(getDAGId(), historyEvent));
       }
     }
     for(TezEvent tezEvent : tezEvents) {
       if (LOG.isDebugEnabled()) {
-        LOG.debug("Vertex: " + vertex.getName() + " routing event: "
+        LOG.debug("Vertex: " + getLogIdentifier() + " routing event: "
             + tezEvent.getEventType()
             + " Recovered:" + recovered);
       }
@@ -3998,7 +4150,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
       case DATA_MOVEMENT_EVENT:
       case COMPOSITE_DATA_MOVEMENT_EVENT:
         {
-          if (isEventFromVertex(vertex, sourceMeta)) {
+          if (isEventFromVertex(this, sourceMeta)) {
             // event from this vertex. send to destination vertex
             TezTaskAttemptID srcTaId = sourceMeta.getTaskAttemptID();
             if (tezEvent.getEventType() == EventType.DATA_MOVEMENT_EVENT) {
@@ -4008,56 +4160,86 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
             } else {
               ((InputFailedEvent) tezEvent.getEvent()).setVersion(srcTaId.getId());
             }
-            Vertex destVertex = vertex.getDAG().getVertex(sourceMeta.getEdgeVertexName());
-            Edge destEdge = vertex.targetVertices.get(destVertex);
+            Vertex destVertex = getDAG().getVertex(sourceMeta.getEdgeVertexName());
+            Edge destEdge = targetVertices.get(destVertex);
             if (destEdge == null) {
               throw new TezUncheckedException("Bad destination vertex: " +
                   sourceMeta.getEdgeVertexName() + " for event vertex: " +
-                  vertex.getLogIdentifier());
+                  getLogIdentifier());
             }
-            vertex.eventHandler.handle(new VertexEventRouteEvent(destVertex
+            eventHandler.handle(new VertexEventRouteEvent(destVertex
                 .getVertexId(), Collections.singletonList(tezEvent)));
           } else {
-            // event not from this vertex. must have come from source vertex.
-            // send to tasks
-            if (vertex.tasksNotYetScheduled) {
-              vertex.pendingTaskEvents.add(tezEvent);
+            if (tasksNotYetScheduled) {
+              // this is only needed to support mixed mode routing. Else for
+              // on demand routing events can be directly added to taskEvents
+              // when legacy routing is removed then pending task events can be
+              // removed.
+              pendingTaskEvents.add(tezEvent);
             } else {
-              Edge srcEdge = vertex.sourceVertices.get(vertex.getDAG().getVertex(
-                  sourceMeta.getTaskVertexName()));
-              if (srcEdge == null) {
-                throw new TezUncheckedException("Bad source vertex: " +
-                    sourceMeta.getTaskVertexName() + " for destination vertex: " +
-                    vertex.getLogIdentifier());
+              // event not from this vertex. must have come from source vertex.
+              if (useOnDemandRouting) {
+                int srcTaskIndex = sourceMeta.getTaskAttemptID().getTaskID().getId();
+                Vertex edgeVertex = getDAG().getVertex(sourceMeta.getTaskVertexName());
+                Edge srcEdge = sourceVertices.get(edgeVertex);
+                if (srcEdge == null) {
+                  throw new TezUncheckedException("Bad source vertex: " +
+                      sourceMeta.getTaskVertexName() + " for destination vertex: " +
+                      getLogIdentifier());
+                }
+                onDemandRouteEventsWriteLock.lock();
+                try {
+                  onDemandRouteEvents.add(new EventInfo(tezEvent, srcEdge, srcTaskIndex));
+                } finally {
+                  onDemandRouteEventsWriteLock.unlock();
+                }
+              } else {
+                // send to tasks            
+                Edge srcEdge = sourceVertices.get(getDAG().getVertex(
+                    sourceMeta.getTaskVertexName()));
+                if (srcEdge == null) {
+                  throw new TezUncheckedException("Bad source vertex: "
+                      + sourceMeta.getTaskVertexName() + " for destination vertex: "
+                      + getLogIdentifier());
+                }
+                srcEdge.sendTezEventToDestinationTasks(tezEvent);
               }
-              srcEdge.sendTezEventToDestinationTasks(tezEvent);
             }
           }
         }
         break;
       case ROOT_INPUT_DATA_INFORMATION_EVENT:
-        if (vertex.tasksNotYetScheduled) {
-          vertex.pendingTaskEvents.add(tezEvent);
+      {   
+        checkEventSourceMetadata(this, sourceMeta);
+        if (tasksNotYetScheduled) {
+          // this is only needed to support mixed mode routing. Else for
+          // on demand routing events can be directly added to taskEvents
+          // when legacy routing is removed then pending task events can be
+          // removed.
+          pendingTaskEvents.add(tezEvent);          
         } else {
-          checkEventSourceMetadata(vertex, sourceMeta);
-          InputDataInformationEvent riEvent = (InputDataInformationEvent) tezEvent
-              .getEvent();
-          Task targetTask = vertex.getTask(riEvent.getTargetIndex());
-          targetTask.registerTezEvent(tezEvent);
+          if (useOnDemandRouting) {
+            onDemandRouteEvents.add(new EventInfo(tezEvent, null, -1));
+          } else {
+            InputDataInformationEvent riEvent = (InputDataInformationEvent) tezEvent.getEvent();
+            Task targetTask = getTask(riEvent.getTargetIndex());
+            targetTask.registerTezEvent(tezEvent);
+          }
         }
+      }
         break;
       case VERTEX_MANAGER_EVENT:
       {
         // VM events on task success only can be changed as part of TEZ-1532
         VertexManagerEvent vmEvent = (VertexManagerEvent) tezEvent.getEvent();
-        Vertex target = vertex.getDAG().getVertex(vmEvent.getTargetVertexName());
+        Vertex target = getDAG().getVertex(vmEvent.getTargetVertexName());
         Preconditions.checkArgument(target != null,
             "Event sent to unkown vertex: " + vmEvent.getTargetVertexName());
-        if (target == vertex) {
-          vertex.vertexManager.onVertexManagerEventReceived(vmEvent);
+        if (target == this) {
+          vertexManager.onVertexManagerEventReceived(vmEvent);
         } else {
-          checkEventSourceMetadata(vertex, sourceMeta);
-          vertex.eventHandler.handle(new VertexEventRouteEvent(target
+          checkEventSourceMetadata(this, sourceMeta);
+          eventHandler.handle(new VertexEventRouteEvent(target
               .getVertexId(), Collections.singletonList(tezEvent)));
         }
       }
@@ -4065,45 +4247,46 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
       case ROOT_INPUT_INITIALIZER_EVENT:
       {
         InputInitializerEvent riEvent = (InputInitializerEvent) tezEvent.getEvent();
-        Vertex target = vertex.getDAG().getVertex(riEvent.getTargetVertexName());
+        Vertex target = getDAG().getVertex(riEvent.getTargetVertexName());
         Preconditions.checkArgument(target != null,
             "Event sent to unknown vertex: " + riEvent.getTargetVertexName());
         riEvent.setSourceVertexName(tezEvent.getSourceInfo().getTaskVertexName());
-        if (target == vertex) {
-          if (vertex.rootInputDescriptors == null ||
-              !vertex.rootInputDescriptors.containsKey(riEvent.getTargetInputName())) {
+        if (target == this) {
+          if (rootInputDescriptors == null ||
+              !rootInputDescriptors.containsKey(riEvent.getTargetInputName())) {
             throw new TezUncheckedException(
                 "InputInitializerEvent targeted at unknown initializer on vertex " +
-                    vertex.logIdentifier + ", Event=" + riEvent);
+                    logIdentifier + ", Event=" + riEvent);
           }
-          if (vertex.getState() == VertexState.NEW) {
-            vertex.pendingInitializerEvents.add(tezEvent);
-          } else  if (vertex.getState() == VertexState.INITIALIZING) {
-            vertex.rootInputInitializerManager.handleInitializerEvents(Collections.singletonList(tezEvent));
+          if (getState() == VertexState.NEW) {
+            pendingInitializerEvents.add(tezEvent);
+          } else  if (getState() == VertexState.INITIALIZING) {
+            rootInputInitializerManager.handleInitializerEvents(Collections.singletonList(tezEvent));
           } else {
             // Currently, INITED and subsequent states means Initializer complete / failure
             if (LOG.isDebugEnabled()) {
-              LOG.debug("Dropping event" + tezEvent + " since state is not INITIALIZING in " + vertex.getLogIdentifier() + ", state=" + vertex.getState());
+              LOG.debug("Dropping event" + tezEvent + " since state is not INITIALIZING in "
+                  + getLogIdentifier() + ", state=" + getState());
             }
           }
         } else {
-          checkEventSourceMetadata(vertex, sourceMeta);
-          vertex.eventHandler.handle(new VertexEventRouteEvent(target.getVertexId(),
+          checkEventSourceMetadata(this, sourceMeta);
+          eventHandler.handle(new VertexEventRouteEvent(target.getVertexId(),
               Collections.singletonList(tezEvent)));
         }
       }
         break;
       case INPUT_READ_ERROR_EVENT:
         {
-          checkEventSourceMetadata(vertex, sourceMeta);
-          Edge srcEdge = vertex.sourceVertices.get(vertex.getDAG().getVertex(
+          checkEventSourceMetadata(this, sourceMeta);
+          Edge srcEdge = sourceVertices.get(this.getDAG().getVertex(
               sourceMeta.getEdgeVertexName()));
           srcEdge.sendTezEventToSourceTasks(tezEvent);
         }
         break;
       case TASK_ATTEMPT_FAILED_EVENT:
         {
-          checkEventSourceMetadata(vertex, sourceMeta);
+          checkEventSourceMetadata(this, sourceMeta);
           TaskAttemptTerminationCause errCause = null;
           switch (sourceMeta.getEventGenerator()) {
           case INPUT:
@@ -4124,7 +4307,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
           }
           TaskAttemptFailedEvent taskFailedEvent =
               (TaskAttemptFailedEvent) tezEvent.getEvent();
-          vertex.getEventHandler().handle(
+          getEventHandler().handle(
               new TaskAttemptEventAttemptFailed(sourceMeta.getTaskAttemptID(),
                   TaskAttemptEventType.TA_FAILED,
                   "Error: " + taskFailedEvent.getDiagnostics(), 
@@ -4134,8 +4317,8 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
         break;
       case TASK_ATTEMPT_COMPLETED_EVENT:
         {
-          checkEventSourceMetadata(vertex, sourceMeta);
-          vertex.getEventHandler().handle(
+          checkEventSourceMetadata(this, sourceMeta);
+          getEventHandler().handle(
               new TaskAttemptEvent(sourceMeta.getTaskAttemptID(), TaskAttemptEventType.TA_DONE));
         }
         break;

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index 18286b5..5cd487c 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -92,8 +92,8 @@ public class MockDAGAppMaster extends DAGAppMaster {
   MockContainerLauncher containerLauncher;
   boolean initFailFlag;
   boolean startFailFlag;
-  boolean sendDMEvents;
   boolean recoveryFatalError = false;
+  EventsDelegate eventsDelegate;
   CountersDelegate countersDelegate;
   StatisticsDelegate statsDelegate;
   long launcherSleepTime = 1;
@@ -112,6 +112,10 @@ public class MockDAGAppMaster extends DAGAppMaster {
   public static interface CountersDelegate {
     public TezCounters getCounters(TaskSpec taskSpec);
   }
+  
+  public static interface EventsDelegate {
+    public void getEvents(TaskSpec taskSpec, List<TezEvent> events);
+  }
 
   // mock container launcher does not launch real tasks.
   // Upon, launch of a container is simulates the container asking for tasks
@@ -334,7 +338,7 @@ public class MockDAGAppMaster extends DAGAppMaster {
       if (response.shouldDie()) {
         cData.remove();
       } else {
-        cData.nextFromEventId += response.getEvents().size();
+        cData.nextFromEventId += response.getNextFromEventId();
         if (!response.getEvents().isEmpty()) {
           long stopTime = System.nanoTime();
           long stopCpuTime = threadMxBean.getCurrentThreadCpuTime();
@@ -400,19 +404,8 @@ public class MockDAGAppMaster extends DAGAppMaster {
                 updatesToMake != null && cData.numUpdates < updatesToMake) {
               List<TezEvent> events = Lists.newArrayListWithCapacity(
                                       cData.taskSpec.getOutputs().size() + 1);
-              if (sendDMEvents) {
-                for (OutputSpec output : cData.taskSpec.getOutputs()) {
-                  if (output.getPhysicalEdgeCount() == 1) {
-                    events.add(new TezEvent(DataMovementEvent.create(0, 0, 0, null), new EventMetaData(
-                        EventProducerConsumerType.OUTPUT, cData.vName, output
-                            .getDestinationVertexName(), cData.taId)));
-                  } else {
-                    events.add(new TezEvent(CompositeDataMovementEvent.create(0,
-                        output.getPhysicalEdgeCount(), null), new EventMetaData(
-                        EventProducerConsumerType.OUTPUT, cData.vName, output
-                            .getDestinationVertexName(), cData.taId)));
-                  }
-                }
+              if (cData.numUpdates == 0 && eventsDelegate != null) {
+                eventsDelegate.getEvents(cData.taskSpec, events);
               }
               TezCounters counters = null;
               if (countersDelegate != null) {
@@ -428,7 +421,7 @@ public class MockDAGAppMaster extends DAGAppMaster {
               events.add(new TezEvent(new TaskStatusUpdateEvent(counters, progress, stats), new EventMetaData(
                   EventProducerConsumerType.SYSTEM, cData.vName, "", cData.taId)));
               TezHeartbeatRequest request = new TezHeartbeatRequest(cData.numUpdates, events,
-                  cData.cIdStr, cData.taId, cData.nextFromEventId, 10000);
+                  cData.cIdStr, cData.taId, cData.nextFromEventId, 50000);
               doHeartbeat(request, cData);
             } else if (version != null && cData.taId.getId() <= version.intValue()) {
               preemptContainer(cData);

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/test/java/org/apache/tez/dag/app/TestMemoryWithEvents.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestMemoryWithEvents.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestMemoryWithEvents.java
new file mode 100644
index 0000000..c277b38
--- /dev/null
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestMemoryWithEvents.java
@@ -0,0 +1,219 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.tez.dag.app;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.tez.dag.api.DAG;
+import org.apache.tez.dag.api.DataSourceDescriptor;
+import org.apache.tez.dag.api.Edge;
+import org.apache.tez.dag.api.EdgeProperty;
+import org.apache.tez.dag.api.InputDescriptor;
+import org.apache.tez.dag.api.InputInitializerDescriptor;
+import org.apache.tez.dag.api.OutputDescriptor;
+import org.apache.tez.dag.api.ProcessorDescriptor;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.Vertex;
+import org.apache.tez.dag.api.EdgeProperty.DataMovementType;
+import org.apache.tez.dag.api.EdgeProperty.DataSourceType;
+import org.apache.tez.dag.api.EdgeProperty.SchedulingType;
+import org.apache.tez.dag.api.client.DAGClient;
+import org.apache.tez.dag.api.client.DAGStatus;
+import org.apache.tez.dag.app.MockDAGAppMaster.MockContainerLauncher;
+import org.apache.tez.runtime.api.Event;
+import org.apache.tez.runtime.api.InputInitializer;
+import org.apache.tez.runtime.api.InputInitializerContext;
+import org.apache.tez.runtime.api.events.InputDataInformationEvent;
+import org.apache.tez.runtime.api.events.InputInitializerEvent;
+import org.junit.Assert;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+// The objective of these tests is to make sure the large job simulations pass 
+// within the memory limits set by the junit tests (1GB)
+// For large jobs please increase memory limits to account for memory used by the 
+// simulation code itself
+public class TestMemoryWithEvents {
+  static Configuration defaultConf;
+  static FileSystem localFs;
+  
+  static {
+    try {
+      defaultConf = new Configuration(false);
+      defaultConf.set("fs.defaultFS", "file:///");
+      defaultConf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true);
+      localFs = FileSystem.getLocal(defaultConf);
+      String stagingDir = "target" + Path.SEPARATOR + TestMemoryWithEvents.class.getName() + "-tmpDir";
+      defaultConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir);
+      Logger.getRootLogger().setLevel(Level.WARN);
+    } catch (IOException e) {
+      throw new RuntimeException("init failure", e);
+    }
+  }
+
+  final int numThreads = 30;
+  final int numTasks = 10000;
+
+  private void checkMemory(String name, MockDAGAppMaster mockApp) {
+    long mb = 1024*1024;
+    long microsPerMs = 1000;
+
+    //Getting the runtime reference from system
+    Runtime runtime = Runtime.getRuntime();
+
+    System.out.println("##### Heap utilization statistics [MB] for " + name);
+
+    runtime.gc();
+    
+    //Print used memory
+    System.out.println("##### Used Memory:"
+        + (runtime.totalMemory() - runtime.freeMemory()) / mb);
+
+    //Print free memory
+    System.out.println("##### Free Memory:"
+        + runtime.freeMemory() / mb);
+     
+    //Print total available memory
+    System.out.println("##### Total Memory:" + runtime.totalMemory() / mb);
+
+    //Print Maximum available memory
+    System.out.println("##### Max Memory:" + runtime.maxMemory() / mb);
+    
+    //Print Maximum heartbeat time
+    long numHeartbeats = mockApp.numHearbeats.get();
+    if (numHeartbeats == 0) {
+      numHeartbeats = 1;
+    }
+    System.out.println("##### Heartbeat (ms) :" 
+        + " latency avg: " + ((mockApp.heartbeatTime.get() / numHeartbeats) / microsPerMs) 
+        + " cpu total: " + (mockApp.heartbeatCpu.get() / microsPerMs)
+        + " cpu avg: " + ((mockApp.heartbeatCpu.get() / numHeartbeats) / microsPerMs)
+        + " numHeartbeats: " + mockApp.numHearbeats.get());
+  }
+  
+  private void testMemory(DAG dag, boolean sendDMEvents) throws Exception {
+    TezConfiguration tezconf = new TezConfiguration(defaultConf);
+
+    MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null,
+        null, false, false, numThreads, 1000);
+    tezClient.start();
+    
+    MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
+    MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
+    mockLauncher.startScheduling(false);
+    mockApp.eventsDelegate = new TestMockDAGAppMaster.TestEventsDelegate();
+    mockApp.doSleep = false;
+    DAGClient dagClient = tezClient.submitDAG(dag);
+    mockLauncher.waitTillContainersLaunched();
+    mockLauncher.startScheduling(true);
+    DAGStatus status = dagClient.waitForCompletion();
+    Assert.assertEquals(DAGStatus.State.SUCCEEDED, status.getState());
+    checkMemory(dag.getName(), mockApp);
+    
+    tezClient.stop();
+  }
+  
+  public static class SimulationInitializer extends InputInitializer {
+    public SimulationInitializer(InputInitializerContext initializerContext) {
+      super(initializerContext);
+    }
+
+    @Override
+    public List<Event> initialize() throws Exception {
+      int numTasks = getContext().getNumTasks();
+      List<Event> events = Lists.newArrayListWithCapacity(numTasks);
+      for (int i=0; i<numTasks; ++i) {
+        events.add(InputDataInformationEvent.createWithSerializedPayload(i, null));
+      }
+      return events;
+    }
+
+    @Override
+    public void handleInputInitializerEvent(List<InputInitializerEvent> events) throws Exception {
+    }
+  }
+
+  @Ignore
+  @Test (timeout = 600000)
+  public void testMemoryRootInputEvents() throws Exception {
+    DAG dag = DAG.create("testMemoryRootInputEvents");
+    Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), numTasks);
+    Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), numTasks);
+    vA.addDataSource(
+        "Input",
+        DataSourceDescriptor.create(InputDescriptor.create("In"),
+            InputInitializerDescriptor.create(SimulationInitializer.class.getName()), null));
+    dag.addVertex(vA).addVertex(vB);
+    testMemory(dag, false);
+  }
+  
+  @Ignore
+  @Test (timeout = 600000)
+  public void testMemoryOneToOne() throws Exception {
+    DAG dag = DAG.create("testMemoryOneToOne");
+    Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), numTasks);
+    Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), numTasks);
+    dag.addVertex(vA)
+        .addVertex(vB)
+        .addEdge(
+            Edge.create(vA, vB, EdgeProperty.create(DataMovementType.ONE_TO_ONE,
+                DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+                OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
+    testMemory(dag, true);
+  }
+
+  @Ignore
+  @Test (timeout = 600000)
+  public void testMemoryBroadcast() throws Exception {
+    DAG dag = DAG.create("testMemoryBroadcast");
+    Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), numTasks);
+    Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), numTasks);
+    dag.addVertex(vA)
+        .addVertex(vB)
+        .addEdge(
+            Edge.create(vA, vB, EdgeProperty.create(DataMovementType.BROADCAST,
+                DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+                OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
+    testMemory(dag, true);
+  }
+  
+  @Ignore
+  @Test (timeout = 600000)
+  public void testMemoryScatterGather() throws Exception {
+    DAG dag = DAG.create("testMemoryScatterGather");
+    Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), numTasks);
+    Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), numTasks);
+    dag.addVertex(vA)
+        .addVertex(vB)
+        .addEdge(
+            Edge.create(vA, vB, EdgeProperty.create(DataMovementType.SCATTER_GATHER,
+                DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+                OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
+    testMemory(dag, true);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java
index 87ffead..1e7faf9 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java
@@ -23,6 +23,7 @@ import java.io.DataOutput;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.Collections;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@@ -49,6 +50,9 @@ import org.apache.tez.common.counters.TezCounters;
 import org.apache.tez.dag.api.DAG;
 import org.apache.tez.dag.api.DataSourceDescriptor;
 import org.apache.tez.dag.api.Edge;
+import org.apache.tez.dag.api.EdgeManagerPlugin;
+import org.apache.tez.dag.api.EdgeManagerPluginContext;
+import org.apache.tez.dag.api.EdgeManagerPluginDescriptor;
 import org.apache.tez.dag.api.EdgeProperty;
 import org.apache.tez.dag.api.EdgeProperty.DataSourceType;
 import org.apache.tez.dag.api.EdgeProperty.SchedulingType;
@@ -69,6 +73,7 @@ import org.apache.tez.dag.api.client.VertexStatus;
 import org.apache.tez.dag.api.client.VertexStatus.State;
 import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
 import org.apache.tez.dag.app.MockDAGAppMaster.CountersDelegate;
+import org.apache.tez.dag.app.MockDAGAppMaster.EventsDelegate;
 import org.apache.tez.dag.app.MockDAGAppMaster.MockContainerLauncher;
 import org.apache.tez.dag.app.MockDAGAppMaster.MockContainerLauncher.ContainerData;
 import org.apache.tez.dag.app.MockDAGAppMaster.StatisticsDelegate;
@@ -84,13 +89,17 @@ import org.apache.tez.dag.records.TezVertexID;
 import org.apache.tez.runtime.api.VertexStatistics;
 import org.apache.tez.runtime.api.OutputCommitter;
 import org.apache.tez.runtime.api.OutputCommitterContext;
+import org.apache.tez.runtime.api.events.CompositeDataMovementEvent;
 import org.apache.tez.runtime.api.events.DataMovementEvent;
+import org.apache.tez.runtime.api.events.InputReadErrorEvent;
+import org.apache.tez.runtime.api.impl.EventMetaData;
 import org.apache.tez.runtime.api.impl.IOStatistics;
 import org.apache.tez.runtime.api.impl.InputSpec;
 import org.apache.tez.runtime.api.impl.OutputSpec;
 import org.apache.tez.runtime.api.impl.TaskSpec;
 import org.apache.tez.runtime.api.impl.TaskStatistics;
 import org.apache.tez.runtime.api.impl.TezEvent;
+import org.apache.tez.runtime.api.impl.EventMetaData.EventProducerConsumerType;
 import org.junit.Assert;
 import org.junit.Ignore;
 import org.junit.Test;
@@ -101,8 +110,7 @@ import com.google.common.primitives.Ints;
 public class TestMockDAGAppMaster {
   private static final Log LOG = LogFactory.getLog(TestMockDAGAppMaster.class);
   static Configuration defaultConf;
-  static FileSystem localFs;
-  
+  static FileSystem localFs;  
   static {
     try {
       defaultConf = new Configuration(false);
@@ -116,6 +124,24 @@ public class TestMockDAGAppMaster {
     }
   }
   
+  static class TestEventsDelegate implements EventsDelegate {
+    @Override
+    public void getEvents(TaskSpec taskSpec, List<TezEvent> events) {
+      for (OutputSpec output : taskSpec.getOutputs()) {
+        if (output.getPhysicalEdgeCount() == 1) {
+          events.add(new TezEvent(DataMovementEvent.create(0, 0, 0, null), new EventMetaData(
+              EventProducerConsumerType.OUTPUT, taskSpec.getVertexName(), output
+                  .getDestinationVertexName(), taskSpec.getTaskAttemptID())));
+        } else {
+          events.add(new TezEvent(CompositeDataMovementEvent.create(0,
+              output.getPhysicalEdgeCount(), null), new EventMetaData(
+              EventProducerConsumerType.OUTPUT, taskSpec.getVertexName(), output
+                  .getDestinationVertexName(), taskSpec.getTaskAttemptID())));
+        }
+      }
+    }    
+  }
+  
   @Test (timeout = 5000)
   public void testLocalResourceSetup() throws Exception {
     TezConfiguration tezconf = new TezConfiguration(defaultConf);
@@ -196,7 +222,7 @@ public class TestMockDAGAppMaster {
     MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
     MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
     mockLauncher.startScheduling(false);
-    mockApp.sendDMEvents = true;
+    mockApp.eventsDelegate = new TestEventsDelegate();
     DAG dag = DAG.create("testBasicEvents");
     Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 2);
     Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), 2);
@@ -227,7 +253,8 @@ public class TestMockDAGAppMaster {
     Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
     VertexImpl vImpl = (VertexImpl) dagImpl.getVertex(vB.getName());
     TaskImpl tImpl = (TaskImpl) vImpl.getTask(1);
-    List<TezEvent> tEvents = tImpl.getTaskEvents();
+    TezTaskAttemptID taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 1);
+    List<TezEvent> tEvents = vImpl.getTaskAttemptTezEvents(taId, 0, 1000).getEvents();
     Assert.assertEquals(2, tEvents.size()); // 2 from vA
     Assert.assertEquals(vA.getName(), tEvents.get(0).getDestinationInfo().getEdgeVertexName());
     Assert.assertEquals(0, ((DataMovementEvent)tEvents.get(0).getEvent()).getSourceIndex());
@@ -240,7 +267,8 @@ public class TestMockDAGAppMaster {
         (targetIndex1 == 0 && targetIndex2 == 1) || (targetIndex1 == 1 && targetIndex2 == 0));
     vImpl = (VertexImpl) dagImpl.getVertex(vC.getName());
     tImpl = (TaskImpl) vImpl.getTask(1);
-    tEvents = tImpl.getTaskEvents();
+    taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 1);
+    tEvents = vImpl.getTaskAttemptTezEvents(taId, 0, 1000).getEvents();
     Assert.assertEquals(2, tEvents.size()); // 2 from vA
     Assert.assertEquals(vA.getName(), tEvents.get(0).getDestinationInfo().getEdgeVertexName());
     Assert.assertEquals(1, ((DataMovementEvent)tEvents.get(0).getEvent()).getSourceIndex());
@@ -253,7 +281,8 @@ public class TestMockDAGAppMaster {
         (targetIndex1 == 0 && targetIndex2 == 1) || (targetIndex1 == 1 && targetIndex2 == 0));
     vImpl = (VertexImpl) dagImpl.getVertex(vD.getName());
     tImpl = (TaskImpl) vImpl.getTask(1);
-    tEvents = tImpl.getTaskEvents();
+    taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 1);
+    tEvents = vImpl.getTaskAttemptTezEvents(taId, 0, 1000).getEvents();
     Assert.assertEquals(1, tEvents.size()); // 1 from vA
     Assert.assertEquals(vA.getName(), tEvents.get(0).getDestinationInfo().getEdgeVertexName());
     Assert.assertEquals(0, ((DataMovementEvent)tEvents.get(0).getEvent()).getTargetIndex());
@@ -261,6 +290,125 @@ public class TestMockDAGAppMaster {
 
     tezClient.stop();
   }
+  
+  public static class LegacyEdgeTestEdgeManager extends EdgeManagerPlugin {
+    List<Integer> destinationInputIndices = 
+        Collections.unmodifiableList(Collections.singletonList(0));
+    public LegacyEdgeTestEdgeManager(EdgeManagerPluginContext context) {
+      super(context);
+    }
+
+    @Override
+    public void initialize() throws Exception {
+    }
+
+    @Override
+    public int getNumDestinationTaskPhysicalInputs(int destinationTaskIndex) throws Exception {
+      return 1;
+    }
+
+    @Override
+    public int getNumSourceTaskPhysicalOutputs(int sourceTaskIndex) throws Exception {
+      return 1;
+    }
+
+    @Override
+    public void routeDataMovementEventToDestination(DataMovementEvent event,
+        int sourceTaskIndex, int sourceOutputIndex, 
+        Map<Integer, List<Integer>> destinationTaskAndInputIndices) {
+      destinationTaskAndInputIndices.put(sourceTaskIndex, destinationInputIndices);
+    }
+
+    @Override
+    public void routeInputSourceTaskFailedEventToDestination(int sourceTaskIndex,
+        Map<Integer, List<Integer>> destinationTaskAndInputIndices) {
+      destinationTaskAndInputIndices.put(sourceTaskIndex, destinationInputIndices);
+    }
+
+    @Override
+    public int routeInputErrorEventToSource(InputReadErrorEvent event,
+        int destinationTaskIndex, int destinationFailedInputIndex) {
+      return destinationTaskIndex;
+    }
+    
+    @Override
+    public int getNumDestinationConsumerTasks(int sourceTaskIndex) {
+      return 1;
+    }
+  }
+  
+  @Test (timeout = 100000)
+  public void testMixedEdgeRouting() throws Exception {
+   TezConfiguration tezconf = new TezConfiguration(defaultConf);
+    
+    MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null, null);
+    tezClient.start();
+    
+    MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
+    MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
+    mockLauncher.startScheduling(false);
+    mockApp.eventsDelegate = new TestEventsDelegate();
+    DAG dag = DAG.create("testMixedEdgeRouting");
+    Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 1);
+    Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), 1);
+    Vertex vC = Vertex.create("C", ProcessorDescriptor.create("Proc.class"), 1);
+    Vertex vD = Vertex.create("D", ProcessorDescriptor.create("Proc.class"), 1);
+    Vertex vE = Vertex.create("E", ProcessorDescriptor.create("Proc.class"), 1);
+    dag.addVertex(vA)
+        .addVertex(vB)
+        .addVertex(vC)
+        .addVertex(vD)
+        .addVertex(vE)
+        .addEdge(
+            Edge.create(vA, vC, EdgeProperty.create(DataMovementType.SCATTER_GATHER,
+                DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+                OutputDescriptor.create("Out"), InputDescriptor.create("In"))))
+        .addEdge(
+            Edge.create(vB, vC, EdgeProperty.create(DataMovementType.SCATTER_GATHER,
+                DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+                OutputDescriptor.create("Out"), InputDescriptor.create("In"))))
+        .addEdge(
+            Edge.create(vA, vD, EdgeProperty.create(DataMovementType.SCATTER_GATHER,
+                DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+                OutputDescriptor.create("Out"), InputDescriptor.create("In"))))
+        .addEdge(
+            Edge.create(vB, vD, EdgeProperty.create(
+                EdgeManagerPluginDescriptor.create(LegacyEdgeTestEdgeManager.class.getName()),
+                DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+                OutputDescriptor.create("Out"), InputDescriptor.create("In"))))
+        .addEdge(
+            Edge.create(vB, vE, EdgeProperty.create(
+              EdgeManagerPluginDescriptor.create(LegacyEdgeTestEdgeManager.class.getName()),
+              DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+              OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
+
+    DAGClient dagClient = tezClient.submitDAG(dag);
+    mockLauncher.waitTillContainersLaunched();
+    DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
+    mockLauncher.startScheduling(true);
+    dagClient.waitForCompletion();
+    Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
+    // vC uses on demand routing and its task does not provide events
+    VertexImpl vImpl = (VertexImpl) dagImpl.getVertex(vC.getName());
+    Assert.assertEquals(true, vImpl.useOnDemandRouting);
+    TaskImpl tImpl = (TaskImpl) vImpl.getTask(0);
+    TezTaskAttemptID taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 0);
+    Assert.assertEquals(0, tImpl.getTaskAttemptTezEvents(taId, 0, 1000).size());
+    // vD is mixed more and does not use on demand routing and its task provides events
+    vImpl = (VertexImpl) dagImpl.getVertex(vD.getName());
+    Assert.assertEquals(false, vImpl.useOnDemandRouting);
+    tImpl = (TaskImpl) vImpl.getTask(0);
+    taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 0);
+    Assert.assertEquals(2, tImpl.getTaskAttemptTezEvents(taId, 0, 1000).size());
+    // vE has single legacy edge and does not use on demand routing and its task provides events
+    vImpl = (VertexImpl) dagImpl.getVertex(vD.getName());
+    Assert.assertEquals(false, vImpl.useOnDemandRouting);
+    tImpl = (TaskImpl) vImpl.getTask(0);
+    taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 0);
+    Assert.assertEquals(2, tImpl.getTaskAttemptTezEvents(taId, 0, 1000).size());
+
+    tezClient.stop();
+  }
 
   @Test (timeout = 10000)
   public void testBasicCounters() throws Exception {

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
index f974f40..db8eff1 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
@@ -42,7 +42,6 @@ import org.apache.tez.common.ContainerTask;
 import org.apache.tez.common.security.JobTokenSecretManager;
 import org.apache.tez.dag.api.TezException;
 import org.apache.tez.dag.app.dag.DAG;
-import org.apache.tez.dag.app.dag.Task;
 import org.apache.tez.dag.app.dag.Vertex;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
 import org.apache.tez.dag.app.dag.event.VertexEventRouteEvent;
@@ -62,6 +61,7 @@ import org.apache.tez.runtime.api.impl.EventType;
 import org.apache.tez.runtime.api.impl.TaskSpec;
 import org.apache.tez.runtime.api.impl.TezEvent;
 import org.apache.tez.runtime.api.impl.TezHeartbeatRequest;
+import org.apache.tez.runtime.api.impl.TezHeartbeatResponse;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.ArgumentCaptor;
@@ -186,7 +186,7 @@ public class TestTaskAttemptListenerImplTezDag {
       new TezEvent(new TaskAttemptCompletedEvent(), null)
     );
 
-    EventHandler eventHandler = generateHeartbeat(events);
+    generateHeartbeat(events, 0, 1, 0, new ArrayList<TezEvent>());
 
     ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
     verify(eventHandler, times(2)).handle(arg.capture());
@@ -212,7 +212,7 @@ public class TestTaskAttemptListenerImplTezDag {
     List<TezEvent> events = Arrays.asList(
       new TezEvent(new TaskAttemptCompletedEvent(), null)
     );
-    final EventHandler eventHandler = generateHeartbeat(events);
+    generateHeartbeat(events, 0, 1, 0, new ArrayList<TezEvent>());
 
     ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
     verify(eventHandler, times(1)).handle(arg.capture());
@@ -222,18 +222,29 @@ public class TestTaskAttemptListenerImplTezDag {
     assertEquals("only event should be route event", VertexEventType.V_ROUTE_EVENT,
         event.getType());
   }
+  
+  @Test (timeout = 5000)
+  public void testTaskHeartbeatResponse() throws Exception {
+    List<TezEvent> events = new ArrayList<TezEvent>();
+    List<TezEvent> eventsToSend = new ArrayList<TezEvent>();
+    TezHeartbeatResponse response = generateHeartbeat(events, 0, 1, 2, eventsToSend);
+    
+    assertEquals(2, response.getNextFromEventId());
+    assertEquals(1, response.getLastRequestId());
+    assertEquals(eventsToSend, response.getEvents());
+  }
 
-  private EventHandler generateHeartbeat(List<TezEvent> events) throws IOException, TezException {
+  private TezHeartbeatResponse generateHeartbeat(List<TezEvent> events,
+      int fromEventId, int maxEvents, int nextFromEventId,
+      List<TezEvent> sendEvents) throws IOException, TezException {
     ContainerId containerId = createContainerId(appId, 1);
     long requestId = 0;
     Vertex vertex = mock(Vertex.class);
-    Task task = mock(Task.class);
 
     doReturn(vertex).when(dag).getVertex(vertexID);
     doReturn("test_vertex").when(vertex).getName();
-    doReturn(task).when(vertex).getTask(taskID);
-
-    doReturn(new ArrayList<TezEvent>()).when(task).getTaskAttemptTezEvents(taskAttemptID, 0, 1);
+    TaskAttemptEventInfo eventInfo = new TaskAttemptEventInfo(nextFromEventId, sendEvents);
+    doReturn(eventInfo).when(vertex).getTaskAttemptTezEvents(taskAttemptID, fromEventId, maxEvents);
 
     taskAttemptListener.registerRunningContainer(containerId);
     taskAttemptListener.registerTaskAttempt(amContainerTask, containerId);
@@ -243,10 +254,10 @@ public class TestTaskAttemptListenerImplTezDag {
     doReturn(taskAttemptID).when(request).getCurrentTaskAttemptID();
     doReturn(++requestId).when(request).getRequestId();
     doReturn(events).when(request).getEvents();
+    doReturn(maxEvents).when(request).getMaxEvents();
+    doReturn(fromEventId).when(request).getStartIndex();
 
-    taskAttemptListener.heartbeat(request);
-
-    return eventHandler;
+    return taskAttemptListener.heartbeat(request);
   }
 
 

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java
index ba40146..d2aa2d0 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java
@@ -51,9 +51,9 @@ import org.apache.hadoop.yarn.util.Clock;
 import org.apache.hadoop.yarn.util.SystemClock;
 import org.apache.tez.common.MockDNSToSwitchMapping;
 import org.apache.tez.dag.api.DataSinkDescriptor;
-import org.apache.tez.dag.api.EdgeManagerPlugin;
 import org.apache.tez.dag.api.EdgeManagerPluginContext;
 import org.apache.tez.dag.api.EdgeManagerPluginDescriptor;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand;
 import org.apache.tez.dag.api.GroupInputEdge;
 import org.apache.tez.dag.api.DAG;
 import org.apache.tez.dag.api.EdgeProperty;
@@ -1028,6 +1028,8 @@ public class TestDAGImpl {
         new EventMetaData(EventProducerConsumerType.INPUT, "vertex1", "vertex2", ta1.getID()));
     dispatcher.getEventHandler().handle(new VertexEventRouteEvent(v2.getVertexId(), Lists.newArrayList(tezEvent)));
     dispatcher.await();
+    v2.getTaskAttemptTezEvents(ta1.getID(), 0, 1000);
+    dispatcher.await();
 
     Assert.assertEquals(VertexState.FAILED, v2.getState());
     Assert.assertEquals(VertexState.KILLED, v1.getState());
@@ -1037,7 +1039,40 @@ public class TestDAGImpl {
   
   @SuppressWarnings("unchecked")
   @Test(timeout = 5000)
-  public void testEdgeManager_RouteInputSourceTaskFailedEventToDestination() {
+  public void testEdgeManager_RouteDataMovementEventToDestinationWithLegacyRouting() {
+    // Remove after legacy routing is removed
+    setupDAGWithCustomEdge(ExceptionLocation.RouteDataMovementEventToDestination);
+    dispatcher.getEventHandler().handle(
+        new DAGEvent(dagWithCustomEdge.getID(), DAGEventType.DAG_INIT));
+    dispatcher.getEventHandler().handle(new DAGEventStartDag(dagWithCustomEdge.getID(), 
+        null));
+    dispatcher.await();
+    Assert.assertEquals(DAGState.RUNNING, dagWithCustomEdge.getState());
+
+    VertexImpl v1 = (VertexImpl)dagWithCustomEdge.getVertex("vertex1");
+    VertexImpl v2 = (VertexImpl)dagWithCustomEdge.getVertex("vertex2");
+    v1.useOnDemandRouting = false;
+    v2.useOnDemandRouting = false;
+    dispatcher.await();
+    Task t1= v2.getTask(0);
+    TaskAttemptImpl ta1= (TaskAttemptImpl)t1.getAttempt(TezTaskAttemptID.getInstance(t1.getTaskId(), 0));
+
+    DataMovementEvent daEvent = DataMovementEvent.create(ByteBuffer.wrap(new byte[0]));
+    TezEvent tezEvent = new TezEvent(daEvent, 
+        new EventMetaData(EventProducerConsumerType.INPUT, "vertex1", "vertex2", ta1.getID()));
+    dispatcher.getEventHandler().handle(new VertexEventRouteEvent(v2.getVertexId(), Lists.newArrayList(tezEvent)));
+    dispatcher.await();
+
+    Assert.assertEquals(VertexState.FAILED, v2.getState());
+    Assert.assertEquals(VertexState.KILLED, v1.getState());
+    String diag = StringUtils.join(v2.getDiagnostics(), ",");
+    Assert.assertTrue(diag.contains(ExceptionLocation.RouteDataMovementEventToDestination.name()));
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test(timeout = 5000)
+  public void testEdgeManager_RouteInputSourceTaskFailedEventToDestinationLegacyRouting() {
+    // Remove after legacy routing is removed
     setupDAGWithCustomEdge(ExceptionLocation.RouteInputSourceTaskFailedEventToDestination);
     dispatcher.getEventHandler().handle(
         new DAGEvent(dagWithCustomEdge.getID(), DAGEventType.DAG_INIT));
@@ -1048,6 +1083,8 @@ public class TestDAGImpl {
 
     VertexImpl v1 = (VertexImpl)dagWithCustomEdge.getVertex("vertex1");
     VertexImpl v2 = (VertexImpl)dagWithCustomEdge.getVertex("vertex2");
+    v1.useOnDemandRouting = false;
+    v2.useOnDemandRouting = false;
     dispatcher.await();
 
     Task t1= v2.getTask(0);
@@ -1057,13 +1094,15 @@ public class TestDAGImpl {
         new EventMetaData(EventProducerConsumerType.INPUT,"vertex1", "vertex2", ta1.getID()));
     dispatcher.getEventHandler().handle(new VertexEventRouteEvent(v2.getVertexId(), Lists.newArrayList(tezEvent)));
     dispatcher.await();
-    // 
+    v2.getTaskAttemptTezEvents(ta1.getID(), 0, 1000);
+    dispatcher.await();
     Assert.assertEquals(VertexState.FAILED, v2.getState());
+    
     Assert.assertEquals(VertexState.KILLED, v1.getState());
     String diag = StringUtils.join(v2.getDiagnostics(), ",");
     Assert.assertTrue(diag.contains(ExceptionLocation.RouteInputSourceTaskFailedEventToDestination.name()));
   }
-  
+
   @SuppressWarnings("unchecked")
   @Test(timeout = 5000)
   public void testEdgeManager_GetNumDestinationConsumerTasks() {
@@ -1773,7 +1812,7 @@ public class TestDAGImpl {
         TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS_DEFAULT));
   }
 
-  public static class CustomizedEdgeManager extends EdgeManagerPlugin {
+  public static class CustomizedEdgeManager extends EdgeManagerPluginOnDemand {
 
     public static enum ExceptionLocation {
       Initialize,
@@ -1861,6 +1900,47 @@ public class TestDAGImpl {
       }
       return 0;
     }
+
+    @Override
+    public int routeInputErrorEventToSource(int destinationTaskIndex,
+        int destinationFailedInputIndex) throws Exception {
+      if (exLocation == ExceptionLocation.RouteInputErrorEventToSource) {
+        throw new Exception(exLocation.name());
+      }
+      return 0;
+    }
+
+    @Override
+    public EventRouteMetadata routeDataMovementEventToDestination(int sourceTaskIndex,
+        int sourceOutputIndex, int destinationTaskIndex) throws Exception {
+      if (exLocation == ExceptionLocation.RouteDataMovementEventToDestination) {
+        throw new Exception(exLocation.name());
+      }
+      return null;
+    }
+
+    @Override
+    public EventRouteMetadata routeCompositeDataMovementEventToDestination(
+        int sourceTaskIndex, int destinationTaskIndex)
+        throws Exception {
+      if (exLocation == ExceptionLocation.RouteDataMovementEventToDestination) {
+        throw new Exception(exLocation.name());
+      }
+      return null;
+    }
+
+    @Override
+    public EventRouteMetadata routeInputSourceTaskFailedEventToDestination(
+        int sourceTaskIndex, int destinationTaskIndex) throws Exception {
+      if (exLocation == ExceptionLocation.RouteInputSourceTaskFailedEventToDestination) {
+        throw new Exception(exLocation.name());
+      }
+      return null;
+    }
+
+    @Override
+    public void prepareForRouting() throws Exception {
+    }
   }
 
 

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
index 99ec6cf..a8eaca1 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
@@ -73,6 +73,8 @@ import org.apache.tez.dag.api.EdgeManagerPluginDescriptor;
 import org.apache.tez.dag.api.EdgeProperty;
 import org.apache.tez.dag.api.EdgeProperty.DataSourceType;
 import org.apache.tez.dag.api.EdgeProperty.SchedulingType;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand.EventRouteMetadata;
 import org.apache.tez.dag.api.InputDescriptor;
 import org.apache.tez.dag.api.InputInitializerDescriptor;
 import org.apache.tez.dag.api.OutputDescriptor;
@@ -107,6 +109,7 @@ import org.apache.tez.dag.api.records.DAGProtos.TezEntityDescriptorProto;
 import org.apache.tez.dag.api.records.DAGProtos.VertexPlan;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.dag.app.ContainerHeartbeatHandler;
+import org.apache.tez.dag.app.TaskAttemptEventInfo;
 import org.apache.tez.dag.app.TaskAttemptListener;
 import org.apache.tez.dag.app.TaskHeartbeatHandler;
 import org.apache.tez.dag.app.dag.DAG;
@@ -148,7 +151,6 @@ import org.apache.tez.dag.app.rm.container.ContainerContextMatcher;
 import org.apache.tez.dag.history.DAGHistoryEvent;
 import org.apache.tez.dag.history.HistoryEventHandler;
 import org.apache.tez.dag.history.HistoryEventType;
-import org.apache.tez.dag.history.events.VertexRecoverableEventsGeneratedEvent;
 import org.apache.tez.dag.library.vertexmanager.InputReadyVertexManager;
 import org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager;
 import org.apache.tez.dag.records.TaskAttemptTerminationCause;
@@ -186,8 +188,6 @@ import org.junit.BeforeClass;
 import org.junit.Ignore;
 import org.junit.Test;
 import org.mockito.ArgumentCaptor;
-import org.mockito.ArgumentMatcher;
-import org.mockito.Matchers;
 import org.mockito.Mockito;
 import org.mockito.internal.util.collections.Sets;
 
@@ -2462,6 +2462,101 @@ public class TestVertexImpl {
     VertexImpl v = vertices.get("vertex2");
     startVertex(v);
   }
+  
+  @Test (timeout = 5000)
+  public void testVertexGetTAAttempts() throws Exception {
+    initAllVertices(VertexState.INITED);
+    VertexImpl v1 = vertices.get("vertex1");
+    startVertex(v1);
+    VertexImpl v2 = vertices.get("vertex2");
+    startVertex(v2);
+    VertexImpl v3 = vertices.get("vertex3");
+    VertexImpl v4 = vertices.get("vertex4");
+    
+    Assert.assertEquals(VertexState.RUNNING, v4.getState());
+    Assert.assertEquals(1, v4.sourceVertices.size());
+    Edge e = v4.sourceVertices.get(v3);
+    TezTaskAttemptID v3TaId = TezTaskAttemptID.getInstance(
+        TezTaskID.getInstance(v3.getVertexId(), 0), 0);
+    TezTaskAttemptID v4TaId = TezTaskAttemptID.getInstance(
+        TezTaskID.getInstance(v4.getVertexId(), 0), 0);
+    
+    for (int i=0; i<5; ++i) {
+      v4.handle(new VertexEventRouteEvent(v4.getVertexId(), Collections.singletonList(
+          new TezEvent(DataMovementEvent.create(0, null), 
+              new EventMetaData(EventProducerConsumerType.OUTPUT, v3.getName(), v3.getName(), v3TaId)))));
+    }
+    dispatcher.await();
+    // verify all events have been put in pending.
+    // this is not necessary after legacy routing has been removed
+    Assert.assertEquals(5, v4.pendingTaskEvents.size());
+    v4.scheduleTasks(new LinkedList<VertexManagerPluginContext.TaskWithLocationHint>());
+    // verify all events have been moved to taskEvents
+    Assert.assertEquals(5, v4.getOnDemandRouteEvents().size());
+    for (int i=5; i<11; ++i) {
+      v4.handle(new VertexEventRouteEvent(v4.getVertexId(), Collections.singletonList(
+          new TezEvent(DataMovementEvent.create(0, null), 
+              new EventMetaData(EventProducerConsumerType.OUTPUT, v3.getName(), v3.getName(), v3TaId)))));
+    }
+    dispatcher.await();
+    // verify all events have been are in taskEvents
+    Assert.assertEquals(11, v4.getOnDemandRouteEvents().size());
+    
+    TaskAttemptEventInfo eventInfo;
+    EdgeManagerPluginOnDemand mockPlugin = mock(EdgeManagerPluginOnDemand.class);
+    EventRouteMetadata mockRoute = EventRouteMetadata.create(1, new int[]{0});
+    e.edgeManager = mockPlugin;
+    // source task id will not match. all events will return null
+    when(mockPlugin.routeDataMovementEventToDestination(1, 0, 0)).thenReturn(mockRoute);
+    eventInfo = v4.getTaskAttemptTezEvents(v4TaId, 0, 1);
+    Assert.assertEquals(11, eventInfo.getNextFromEventId()); // all events traversed
+    Assert.assertEquals(0, eventInfo.getEvents().size()); // no events
+    
+    int fromEventId = 0;
+    // source task id will match. all events will be returned
+    // max events is respected.
+    when(
+        mockPlugin.routeDataMovementEventToDestination(anyInt(),
+            anyInt(), anyInt())).thenReturn(mockRoute);
+    for (int i=0; i<11; ++i) {
+      eventInfo = v4.getTaskAttemptTezEvents(v4TaId, fromEventId, 1);
+      fromEventId = eventInfo.getNextFromEventId();
+      Assert.assertEquals((i+1), fromEventId);
+      Assert.assertEquals(1, eventInfo.getEvents().size());
+    }
+    eventInfo = v4.getTaskAttemptTezEvents(v4TaId, fromEventId, 1);
+    Assert.assertEquals(11, eventInfo.getNextFromEventId()); // all events traversed
+    Assert.assertEquals(0, eventInfo.getEvents().size()); // no events
+    
+    // change max events to larger value. max events does not evenly divide total events
+    fromEventId = 0;
+    for (int i=1; i<=2; ++i) {
+      eventInfo = v4.getTaskAttemptTezEvents(v4TaId, fromEventId, 5);
+      fromEventId = eventInfo.getNextFromEventId();
+      Assert.assertEquals((i*5), fromEventId);
+      Assert.assertEquals(5, eventInfo.getEvents().size());
+    }
+    eventInfo = v4.getTaskAttemptTezEvents(v4TaId, fromEventId, 5);
+    Assert.assertEquals(11, eventInfo.getNextFromEventId()); // all events traversed
+    Assert.assertEquals(1, eventInfo.getEvents().size()); // remainder events
+    
+    // return more events that dont evenly fit in max size
+    mockRoute = EventRouteMetadata.create(2, new int[]{0, 0});    
+    when(
+        mockPlugin.routeDataMovementEventToDestination(anyInt(),
+            anyInt(), anyInt())).thenReturn(mockRoute);
+    fromEventId = 0;
+    int lastFromEventId = 0;
+    for (int i=1; i<=4; ++i) {
+      eventInfo = v4.getTaskAttemptTezEvents(v4TaId, fromEventId, 5);
+      fromEventId = eventInfo.getNextFromEventId();
+      Assert.assertEquals((i%2 > 0 ? (lastFromEventId+=2) : (lastFromEventId+=3)), fromEventId);     
+      Assert.assertEquals(5, eventInfo.getEvents().size());
+    }
+    eventInfo = v4.getTaskAttemptTezEvents(v4TaId, fromEventId, 5);
+    Assert.assertEquals(11, eventInfo.getNextFromEventId()); // all events traversed
+    Assert.assertEquals(2, eventInfo.getEvents().size()); // remainder events
+  }
 
   @Test(timeout = 5000)
   public void testVertexReconfigurePlannedAfterInit() throws Exception {
@@ -2632,11 +2727,15 @@ public class TestVertexImpl {
   }
   
   @Test(timeout = 5000)
-  public void testVertexPendingTaskEvents() {
+  public void testVertexPendingTaskEventsLegacyRouting() {
+    // Remove after bulk routing API is removed
     initAllVertices(VertexState.INITED);
     VertexImpl v3 = vertices.get("vertex3");
     VertexImpl v2 = vertices.get("vertex2");
     VertexImpl v1 = vertices.get("vertex1");
+    v1.useOnDemandRouting = false;
+    v2.useOnDemandRouting = false;
+    v3.useOnDemandRouting = false;
     
     startVertex(v1);
     
@@ -4595,8 +4694,6 @@ public class TestVertexImpl {
     dispatcher.await();
     Assert.assertEquals(VertexState.INITED, v1.getState());
     Assert.assertEquals(5, v1.getTotalTasks());
-    // task events get buffered
-    Assert.assertEquals(5, v1.pendingTaskEvents.size());
     Assert.assertEquals(RootInputVertexManager.class.getName(), v1
         .getVertexManager().getPlugin().getClass().getName());
     for (int i=0; i < v1Hints.size(); ++i) {
@@ -4609,6 +4706,16 @@ public class TestVertexImpl {
       Assert.assertEquals(1, inputSpecs.get(0).getPhysicalEdgeCount());
     }
     
+    // fake scheduling start to trigger edge routing to begin
+    v1.scheduleTasks(new LinkedList<VertexManagerPluginContext.TaskWithLocationHint>());
+    // check all tasks get their events
+    for (int i=0; i<v1.getTotalTasks(); ++i) {
+      Assert.assertEquals(
+          1,
+          v1.getTaskAttemptTezEvents(TezTaskAttemptID.getInstance(v1.getTask(i).getTaskId(), 0),
+              0, 100).getEvents().size());
+    }
+    
     VertexImplWithControlledInitializerManager v2 = (VertexImplWithControlledInitializerManager) vertices.get("vertex2");
     Assert.assertEquals(VertexState.INITIALIZING, v2.getState());
     
@@ -4627,7 +4734,6 @@ public class TestVertexImpl {
     dispatcher.getEventHandler().handle(
         new VertexEventRouteEvent(v2.getVertexId(), events));
     dispatcher.await();
-    Assert.assertEquals(1, v2.pendingTaskEvents.size());
     
     RootInputInitializerManagerControlled initializerManager2 = v2.getRootInputInitializerManager();
     List<TaskLocationHint> v2Hints = createTaskLocationHints(10);
@@ -4635,14 +4741,99 @@ public class TestVertexImpl {
     dispatcher.await();
     Assert.assertEquals(VertexState.INITED, v2.getState());
     Assert.assertEquals(10, v2.getTotalTasks());
+    Assert.assertEquals(RootInputVertexManager.class.getName(), v2
+        .getVertexManager().getPlugin().getClass().getName());
+    for (int i=0; i < v2Hints.size(); ++i) {
+      Assert.assertEquals(v2Hints.get(i), v2.getTaskLocationHints()[i]);
+    }
+    Assert.assertEquals(true, initializerManager2.hasShutDown);
+    
+    // fake scheduling start to trigger edge routing to begin
+    v2.scheduleTasks(new LinkedList<VertexManagerPluginContext.TaskWithLocationHint>());
+    // check all tasks get their events
+    for (int i=0; i<v2.getTotalTasks(); ++i) {
+      Assert.assertEquals(
+          ((i==0) ? 2 : 1),
+          v2.getTaskAttemptTezEvents(TezTaskAttemptID.getInstance(v2.getTask(i).getTaskId(), 0),
+              0, 100).getEvents().size());
+    }
+    for (int i = 0; i < 10; i++) {
+      List<InputSpec> inputSpecs = v1.getInputSpecList(i);
+      Assert.assertEquals(1, inputSpecs.size());
+      Assert.assertEquals(1, inputSpecs.get(0).getPhysicalEdgeCount());
+    }
+  }
+  
+  @Test(timeout = 5000)
+  public void testVertexWithInitializerSuccessLegacyRouting() throws Exception {
+    // Remove after legacy routing is removed
+    useCustomInitializer = true;
+    setupPreDagCreation();
+    dagPlan = createDAGPlanWithInputInitializer("TestInputInitializer");
+    setupPostDagCreation();
+
+    VertexImplWithControlledInitializerManager v1 = (VertexImplWithControlledInitializerManager) vertices
+        .get("vertex1");
+    v1.useOnDemandRouting = false;
+    dispatcher.getEventHandler().handle(
+        new VertexEvent(v1.getVertexId(), VertexEventType.V_INIT));
+    dispatcher.await();
+    Assert.assertEquals(VertexState.INITIALIZING, v1.getState());
+    RootInputInitializerManagerControlled initializerManager1 = v1.getRootInputInitializerManager();
+    List<TaskLocationHint> v1Hints = createTaskLocationHints(5);
+    initializerManager1.completeInputInitialization(0, 5, v1Hints);
+    dispatcher.await();
+    Assert.assertEquals(VertexState.INITED, v1.getState());
+    Assert.assertEquals(5, v1.getTotalTasks());
+    Assert.assertEquals(RootInputVertexManager.class.getName(), v1
+        .getVertexManager().getPlugin().getClass().getName());
+    for (int i=0; i < v1Hints.size(); ++i) {
+      Assert.assertEquals(v1Hints.get(i), v1.getTaskLocationHints()[i]);
+    }
+    Assert.assertEquals(true, initializerManager1.hasShutDown);
+    for (int i = 0; i < 5; i++) {
+      List<InputSpec> inputSpecs = v1.getInputSpecList(i);
+      Assert.assertEquals(1, inputSpecs.size());
+      Assert.assertEquals(1, inputSpecs.get(0).getPhysicalEdgeCount());
+    }
     // task events get buffered
-    Assert.assertEquals(11, v2.pendingTaskEvents.size());
+    Assert.assertEquals(5, v1.pendingTaskEvents.size());
+    
+    VertexImplWithControlledInitializerManager v2 = (VertexImplWithControlledInitializerManager) vertices.get("vertex2");
+    Assert.assertEquals(VertexState.INITIALIZING, v2.getState());
+    v2.useOnDemandRouting = false;
+
+    // non-task events don't get buffered
+    List<TezEvent> events = Lists.newLinkedList();
+    TezTaskID t0_v1 = TezTaskID.getInstance(v1.getVertexId(), 0);
+    TezTaskAttemptID ta0_t0_v1 = TezTaskAttemptID.getInstance(t0_v1, 0);
+    events.add(new TezEvent(
+        VertexManagerEvent.create("vertex2", ByteBuffer.wrap(new byte[0])), new EventMetaData(
+            EventProducerConsumerType.PROCESSOR, "vertex1", "vertex2",
+            ta0_t0_v1)));
+    events.add(new TezEvent(InputDataInformationEvent.createWithSerializedPayload(0,
+        ByteBuffer.wrap(new byte[0])),
+        new EventMetaData(EventProducerConsumerType.INPUT, "vertex2",
+            "NULL_VERTEX", null)));
+    dispatcher.getEventHandler().handle(
+        new VertexEventRouteEvent(v2.getVertexId(), events));
+    dispatcher.await();
+    Assert.assertEquals(1, v2.pendingTaskEvents.size());
+    
+    RootInputInitializerManagerControlled initializerManager2 = v2.getRootInputInitializerManager();
+    List<TaskLocationHint> v2Hints = createTaskLocationHints(10);
+    initializerManager2.completeInputInitialization(0, 10, v2Hints);
+    dispatcher.await();
+    Assert.assertEquals(VertexState.INITED, v2.getState());
+    Assert.assertEquals(10, v2.getTotalTasks());
     Assert.assertEquals(RootInputVertexManager.class.getName(), v2
         .getVertexManager().getPlugin().getClass().getName());
     for (int i=0; i < v2Hints.size(); ++i) {
       Assert.assertEquals(v2Hints.get(i), v2.getTaskLocationHints()[i]);
     }
     Assert.assertEquals(true, initializerManager2.hasShutDown);
+    // task events get buffered
+    Assert.assertEquals(11, v2.pendingTaskEvents.size());
     for (int i = 0; i < 10; i++) {
       List<InputSpec> inputSpecs = v1.getInputSpecList(i);
       Assert.assertEquals(1, inputSpecs.size());
@@ -4650,6 +4841,7 @@ public class TestVertexImpl {
     }
   }
 
+  
   @Test(timeout = 5000)
   public void testVertexWithInputDistributor() throws Exception {
     useCustomInitializer = true;

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/test/java/org/apache/tez/test/EdgeManagerForTest.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/test/EdgeManagerForTest.java b/tez-dag/src/test/java/org/apache/tez/test/EdgeManagerForTest.java
index 09f9a20..9cb914f 100644
--- a/tez-dag/src/test/java/org/apache/tez/test/EdgeManagerForTest.java
+++ b/tez-dag/src/test/java/org/apache/tez/test/EdgeManagerForTest.java
@@ -21,13 +21,13 @@ package org.apache.tez.test;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.tez.dag.api.EdgeManagerPlugin;
 import org.apache.tez.dag.api.EdgeManagerPluginContext;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand;
 import org.apache.tez.dag.api.UserPayload;
 import org.apache.tez.runtime.api.events.DataMovementEvent;
 import org.apache.tez.runtime.api.events.InputReadErrorEvent;
 
-public class EdgeManagerForTest extends EdgeManagerPlugin {
+public class EdgeManagerForTest extends EdgeManagerPluginOnDemand {
 
   private UserPayload userPayload;
 
@@ -78,6 +78,35 @@ public class EdgeManagerForTest extends EdgeManagerPlugin {
   public void routeInputSourceTaskFailedEventToDestination(int sourceTaskIndex,
       Map<Integer, List<Integer>> destinationTaskAndInputIndices) { 
   }
+
+  @Override
+  public EventRouteMetadata routeDataMovementEventToDestination(int sourceTaskIndex,
+      int sourceOutputIndex, int destinationTaskIndex) throws Exception {
+    return null;
+  }
+
+  @Override
+  public EventRouteMetadata routeCompositeDataMovementEventToDestination(
+      int sourceTaskIndex, int destinationTaskIndex)
+      throws Exception {
+    return null;
+  }
+
+  @Override
+  public EventRouteMetadata routeInputSourceTaskFailedEventToDestination(
+      int sourceTaskIndex, int destinationTaskIndex) throws Exception {
+    return null;
+  }
+
+  @Override
+  public void prepareForRouting() throws Exception {
+  }
+
+  @Override
+  public int routeInputErrorEventToSource(int destinationTaskIndex, int destinationFailedInputIndex)
+      throws Exception {
+    return 0;
+  }
   
   // End of overridden methods
 

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java
index f8b8621..921095c 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java
@@ -49,6 +49,7 @@ public abstract class RuntimeTask {
   protected final Configuration tezConf;
   protected final TezUmbilical tezUmbilical;
   protected final AtomicInteger eventCounter;
+  protected final AtomicInteger nextFromEventId;
   private final AtomicBoolean taskDone;
   private final TaskCounterUpdater counterUpdater;
   private final TaskStatistics statistics;
@@ -60,6 +61,7 @@ public abstract class RuntimeTask {
     this.tezUmbilical = tezUmbilical;
     this.tezCounters = new TezCounters();
     this.eventCounter = new AtomicInteger(0);
+    this.nextFromEventId = new AtomicInteger(0);
     this.progress = 0.0f;
     this.taskDone = new AtomicBoolean(false);
     this.statistics = new TaskStatistics();
@@ -130,6 +132,14 @@ public abstract class RuntimeTask {
   public int getEventCounter() {
     return eventCounter.get();
   }
+  
+  public int getNextFromEventId() {
+    return nextFromEventId.get();
+  }
+  
+  public void setNextFromEventId(int nextFromEventId) {
+    this.nextFromEventId.set(nextFromEventId);
+  }
 
   public boolean isTaskDone() {
     return taskDone.get();

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezHeartbeatResponse.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezHeartbeatResponse.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezHeartbeatResponse.java
index 10699ac..cecc706 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezHeartbeatResponse.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezHeartbeatResponse.java
@@ -32,6 +32,7 @@ public class TezHeartbeatResponse implements Writable {
   private long lastRequestId;
   private boolean shouldDie = false;
   private List<TezEvent> events;
+  private int nextFromEventId;
 
   public TezHeartbeatResponse() {
   }
@@ -51,6 +52,10 @@ public class TezHeartbeatResponse implements Writable {
   public long getLastRequestId() {
     return lastRequestId;
   }
+  
+  public int getNextFromEventId() {
+    return nextFromEventId;
+  }
 
   public void setEvents(List<TezEvent> events) {
     this.events = Collections.unmodifiableList(events);
@@ -63,11 +68,16 @@ public class TezHeartbeatResponse implements Writable {
   public void setShouldDie() {
     this.shouldDie = true;
   }
+  
+  public void setNextFromEventId(int nextFromEventId) {
+    this.nextFromEventId = nextFromEventId;
+  }
 
   @Override
   public void write(DataOutput out) throws IOException {
     out.writeLong(lastRequestId);
     out.writeBoolean(shouldDie);
+    out.writeInt(nextFromEventId);
     if(events != null) {
       out.writeBoolean(true);
       out.writeInt(events.size());
@@ -83,6 +93,7 @@ public class TezHeartbeatResponse implements Writable {
   public void readFields(DataInput in) throws IOException {
     lastRequestId = in.readLong();
     shouldDie = in.readBoolean();
+    nextFromEventId = in.readInt();
     if(in.readBoolean()) {
       int eventCount = in.readInt();
       events = new ArrayList<TezEvent>(eventCount);
@@ -99,6 +110,7 @@ public class TezHeartbeatResponse implements Writable {
     return "{ "
         + " lastRequestId=" + lastRequestId
         + ", shouldDie=" + shouldDie
+        + ", nextFromEventId=" + nextFromEventId
         + ", eventCount=" + (events != null ? events.size() : 0)
         + " }";
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
index 7324abd..3d1d1a2 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
@@ -240,8 +240,9 @@ public class TaskReporter {
       }
 
       long requestId = requestCounter.incrementAndGet();
+      int fromEventId = task.getNextFromEventId();
       TezHeartbeatRequest request = new TezHeartbeatRequest(requestId, events, containerIdStr,
-          task.getTaskAttemptID(), task.getEventCounter(), maxEventsToGet);
+          task.getTaskAttemptID(), fromEventId, maxEventsToGet);
       if (LOG.isDebugEnabled()) {
         LOG.debug("Sending heartbeat to AM, request=" + request);
       }
@@ -271,11 +272,12 @@ public class TaskReporter {
               + " heartbeat response, eventCount=" + response.getEvents().size());
         }
       } else {
+        task.setNextFromEventId(response.getNextFromEventId());
         if (response.getEvents() != null && !response.getEvents().isEmpty()) {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("Routing events from heartbeat response to task" + ", currentTaskAttemptId="
-                + task.getTaskAttemptID() + ", eventCount=" + response.getEvents().size());
-          }
+          LOG.info("Routing events from heartbeat response to task" + ", currentTaskAttemptId="
+              + task.getTaskAttemptID() + ", eventCount=" + response.getEvents().size()
+              + " fromEventId=" + fromEventId
+              + " nextFromEventId=" + response.getNextFromEventId());
           // This should ideally happen in a separate thread
           numEventsReceived = response.getEvents().size();
           task.handleEvents(response.getEvents());


[21/43] tez git commit: TEZ-2117. Add a manager for ContainerLaunchers running in the AM. (sseth)

Posted by ss...@apache.org.
TEZ-2117. Add a manager for ContainerLaunchers running in the AM.
(sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/5f27b83a
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/5f27b83a
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/5f27b83a

Branch: refs/heads/TEZ-2003
Commit: 5f27b83ad5c02b3ce111694246ba536de543f691
Parents: aadd049
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed Feb 18 14:45:34 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:01 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                            |   1 +
 .../org/apache/tez/dag/app/DAGAppMaster.java    |  54 ++--------
 .../tez/dag/app/launcher/ContainerLauncher.java |   2 +-
 .../app/launcher/ContainerLauncherRouter.java   | 108 +++++++++++++++++++
 .../apache/tez/dag/app/MockDAGAppMaster.java    |   5 +-
 5 files changed, 124 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/5f27b83a/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 975ce65..1cd74a4 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -2,5 +2,6 @@ ALL CHANGES:
   TEZ-2019. Temporarily allow the scheduler and launcher to be specified via configuration.
   TEZ-2006. Task communication plane needs to be pluggable.
   TEZ-2090. Add tests for jobs running in external services.
+  TEZ-2117. Add a manager for ContainerLaunchers running in the AM.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/5f27b83a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index bfc2d58..0f4d812 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -25,8 +25,6 @@ import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.PrintWriter;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
@@ -139,9 +137,7 @@ import org.apache.tez.dag.app.dag.event.TaskEventType;
 import org.apache.tez.dag.app.dag.event.VertexEvent;
 import org.apache.tez.dag.app.dag.event.VertexEventType;
 import org.apache.tez.dag.app.dag.impl.DAGImpl;
-import org.apache.tez.dag.app.launcher.ContainerLauncher;
-import org.apache.tez.dag.app.launcher.ContainerLauncherImpl;
-import org.apache.tez.dag.app.launcher.LocalContainerLauncher;
+import org.apache.tez.dag.app.launcher.ContainerLauncherRouter;
 import org.apache.tez.dag.app.rm.AMSchedulerEventType;
 import org.apache.tez.dag.app.rm.NMCommunicatorEventType;
 import org.apache.tez.dag.app.rm.TaskSchedulerEventHandler;
@@ -226,7 +222,7 @@ public class DAGAppMaster extends AbstractService {
   private AppContext context;
   private Configuration amConf;
   private AsyncDispatcher dispatcher;
-  private ContainerLauncher containerLauncher;
+  private ContainerLauncherRouter containerLauncherRouter;
   private ContainerHeartbeatHandler containerHeartbeatHandler;
   private TaskHeartbeatHandler taskHeartbeatHandler;
   private TaskAttemptListener taskAttemptListener;
@@ -504,9 +500,9 @@ public class DAGAppMaster extends AbstractService {
         taskSchedulerEventHandler);
     addIfServiceDependency(taskSchedulerEventHandler, clientRpcServer);
 
-    containerLauncher = createContainerLauncher(context);
-    addIfService(containerLauncher, true);
-    dispatcher.register(NMCommunicatorEventType.class, containerLauncher);
+    this.containerLauncherRouter = createContainerLauncherRouter(conf);
+    addIfService(containerLauncherRouter, true);
+    dispatcher.register(NMCommunicatorEventType.class, containerLauncherRouter);
 
     historyEventHandler = createHistoryEventHandler(context);
     addIfService(historyEventHandler, true);
@@ -1034,38 +1030,10 @@ public class DAGAppMaster extends AbstractService {
     return chh;
   }
 
-  protected ContainerLauncher
-      createContainerLauncher(final AppContext context) throws UnknownHostException {
-    if(isLocal){
-      LOG.info("Creating LocalContainerLauncher");
-      return new LocalContainerLauncher(context, taskAttemptListener, workingDirectory);
-    } else {
-      // TODO: Temporary reflection with specific parameters until a clean interface is defined.
-      String containerLauncherClassName = getConfig().get(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHER_CLASS);
-      if (containerLauncherClassName == null) {
-        LOG.info("Creating Default Container Launcher");
-        return new ContainerLauncherImpl(context);
-      } else {
-        LOG.info("Creating container launcher : " + containerLauncherClassName);
-        Class<? extends ContainerLauncher> containerLauncherClazz = (Class<? extends ContainerLauncher>) ReflectionUtils.getClazz(
-            containerLauncherClassName);
-        try {
-          Constructor<? extends ContainerLauncher> ctor = containerLauncherClazz
-              .getConstructor(AppContext.class, Configuration.class, TaskAttemptListener.class);
-          ctor.setAccessible(true);
-          ContainerLauncher instance = ctor.newInstance(context, getConfig(), taskAttemptListener);
-          return instance;
-        } catch (NoSuchMethodException e) {
-          throw new TezUncheckedException(e);
-        } catch (InvocationTargetException e) {
-          throw new TezUncheckedException(e);
-        } catch (InstantiationException e) {
-          throw new TezUncheckedException(e);
-        } catch (IllegalAccessException e) {
-          throw new TezUncheckedException(e);
-        }
-      }
-    }
+  protected ContainerLauncherRouter createContainerLauncherRouter(Configuration conf) throws
+      UnknownHostException {
+    return  new ContainerLauncherRouter(conf, isLocal, context, taskAttemptListener, workingDirectory);
+
   }
 
   public ApplicationId getAppID() {
@@ -1088,8 +1056,8 @@ public class DAGAppMaster extends AbstractService {
     return dispatcher;
   }
 
-  public ContainerLauncher getContainerLauncher() {
-    return containerLauncher;
+  public ContainerLauncherRouter getContainerLauncherRouter() {
+    return containerLauncherRouter;
   }
 
   public TaskAttemptListener getTaskAttemptListener() {

http://git-wip-us.apache.org/repos/asf/tez/blob/5f27b83a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java
index 305eb50..8a8498f 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java
@@ -23,7 +23,7 @@ import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.tez.dag.app.dag.DAG;
 import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
 
-public interface ContainerLauncher 
+public interface ContainerLauncher
     extends EventHandler<NMCommunicatorEvent> {
 
     void dagComplete(DAG dag);

http://git-wip-us.apache.org/repos/asf/tez/blob/5f27b83a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
new file mode 100644
index 0000000..34001ed
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app.launcher;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.net.UnknownHostException;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.tez.common.ReflectionUtils;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezUncheckedException;
+import org.apache.tez.dag.app.AppContext;
+import org.apache.tez.dag.app.TaskAttemptListener;
+import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
+
+public class ContainerLauncherRouter extends AbstractService
+    implements EventHandler<NMCommunicatorEvent> {
+
+  static final Log LOG = LogFactory.getLog(ContainerLauncherImpl.class);
+
+  private final ContainerLauncher containerLauncher;
+
+  @VisibleForTesting
+  public ContainerLauncherRouter(ContainerLauncher containerLauncher) {
+    super(ContainerLauncherRouter.class.getName());
+    this.containerLauncher = containerLauncher;
+  }
+
+  // Accepting conf to setup final parameters, if required.
+  public ContainerLauncherRouter(Configuration conf, boolean isLocal, AppContext context,
+                                 TaskAttemptListener taskAttemptListener,
+                                 String workingDirectory) throws UnknownHostException {
+    super(ContainerLauncherRouter.class.getName());
+
+    if (isLocal) {
+      LOG.info("Creating LocalContainerLauncher");
+      containerLauncher =
+          new LocalContainerLauncher(context, taskAttemptListener, workingDirectory);
+    } else {
+      // TODO: Temporary reflection with specific parameters until a clean interface is defined.
+      String containerLauncherClassName =
+          conf.get(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHER_CLASS);
+      if (containerLauncherClassName == null) {
+        LOG.info("Creating Default Container Launcher");
+        containerLauncher = new ContainerLauncherImpl(context);
+      } else {
+        LOG.info("Creating container launcher : " + containerLauncherClassName);
+        Class<? extends ContainerLauncher> containerLauncherClazz =
+            (Class<? extends ContainerLauncher>) ReflectionUtils.getClazz(
+                containerLauncherClassName);
+        try {
+          Constructor<? extends ContainerLauncher> ctor = containerLauncherClazz
+              .getConstructor(AppContext.class, Configuration.class, TaskAttemptListener.class);
+          ctor.setAccessible(true);
+          containerLauncher = ctor.newInstance(context, conf, taskAttemptListener);
+        } catch (NoSuchMethodException e) {
+          throw new TezUncheckedException(e);
+        } catch (InvocationTargetException e) {
+          throw new TezUncheckedException(e);
+        } catch (InstantiationException e) {
+          throw new TezUncheckedException(e);
+        } catch (IllegalAccessException e) {
+          throw new TezUncheckedException(e);
+        }
+      }
+
+    }
+  }
+
+  @Override
+  public void serviceInit(Configuration conf) {
+    ((AbstractService)containerLauncher).init(conf);
+  }
+
+  @Override
+  public void serviceStart() {
+    ((AbstractService)containerLauncher).start();
+  }
+
+  @Override
+  public void serviceStop() {
+    ((AbstractService)containerLauncher).stop();
+  }
+
+
+  @Override
+  public void handle(NMCommunicatorEvent event) {
+    containerLauncher.handle(event);
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/5f27b83a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index b846922..7274cde 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -57,6 +57,7 @@ import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.TaskCommunicator;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.app.launcher.ContainerLauncher;
+import org.apache.tez.dag.app.launcher.ContainerLauncherRouter;
 import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
 import org.apache.tez.dag.app.rm.NMCommunicatorLaunchRequestEvent;
 import org.apache.tez.dag.app.rm.NMCommunicatorStopRequestEvent;
@@ -509,9 +510,9 @@ public class MockDAGAppMaster extends DAGAppMaster {
   
   // use mock container launcher for tests
   @Override
-  protected ContainerLauncher createContainerLauncher(final AppContext context)
+  protected ContainerLauncherRouter createContainerLauncherRouter(final Configuration conf)
       throws UnknownHostException {
-    return containerLauncher;
+    return new ContainerLauncherRouter(containerLauncher);
   }
 
   @Override


[41/43] tez git commit: TEZ-2388. Send dag identifier as part of the fetcher request string. (sseth)

Posted by ss...@apache.org.
TEZ-2388. Send dag identifier as part of the fetcher request string. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/fdb91771
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/fdb91771
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/fdb91771

Branch: refs/heads/TEZ-2003
Commit: fdb91771f0235295ef078871501c1d7d81b63ba8
Parents: 9e098f7
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed Apr 29 08:20:05 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:44:34 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                                  |  1 +
 .../tez/runtime/library/common/shuffle/Fetcher.java   | 14 ++++++++------
 .../runtime/library/common/shuffle/ShuffleUtils.java  |  8 +++++---
 .../library/common/shuffle/impl/ShuffleManager.java   |  2 +-
 .../ShuffleInputEventHandlerOrderedGrouped.java       |  2 +-
 .../runtime/library/common/shuffle/TestFetcher.java   |  6 +++---
 6 files changed, 19 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/fdb91771/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index d42aaf8..9fc9ed3 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -19,5 +19,6 @@ ALL CHANGES:
   TEZ-2347. Expose additional information in TaskCommunicatorContext.
   TEZ-2361. Propagate dag completion to TaskCommunicator.
   TEZ-2381. Fixes after rebase 04/28.
+  TEZ-2388. Send dag identifier as part of the fetcher request string.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/fdb91771/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java
index 61e0151..9fd46a4 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java
@@ -86,6 +86,7 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
   private final FetcherCallback fetcherCallback;
   private final FetchedInputAllocator inputManager;
   private final ApplicationId appId;
+  private final int dagIdentifier;
   
   private final String logIdentifier;
 
@@ -124,7 +125,7 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
   private final boolean isDebugEnabled = LOG.isDebugEnabled();
 
   private Fetcher(FetcherCallback fetcherCallback, HttpConnectionParams params,
-      FetchedInputAllocator inputManager, ApplicationId appId,
+      FetchedInputAllocator inputManager, ApplicationId appId, int dagIdentifier,
       JobTokenSecretManager jobTokenSecretManager, String srcNameTrimmed, Configuration conf,
       RawLocalFileSystem localFs,
       LocalDirAllocator localDirAllocator,
@@ -137,6 +138,7 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
     this.inputManager = inputManager;
     this.jobTokenSecretMgr = jobTokenSecretManager;
     this.appId = appId;
+    this.dagIdentifier = dagIdentifier;
     this.pathToAttemptMap = new HashMap<String, InputAttemptIdentifier>();
     this.httpConnectionParams = params;
     this.conf = conf;
@@ -402,7 +404,7 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
   private HostFetchResult setupConnection(List<InputAttemptIdentifier> attempts) {
     try {
       StringBuilder baseURI = ShuffleUtils.constructBaseURIForShuffleHandler(host,
-          port, partition, appId.toString(), httpConnectionParams.isSSLShuffleEnabled());
+          port, partition, appId.toString(), dagIdentifier, httpConnectionParams.isSSLShuffleEnabled());
       this.url = ShuffleUtils.constructInputURL(baseURI.toString(), attempts,
           httpConnectionParams.getKeepAlive());
 
@@ -902,21 +904,21 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
 
     public FetcherBuilder(FetcherCallback fetcherCallback,
         HttpConnectionParams params, FetchedInputAllocator inputManager,
-        ApplicationId appId, JobTokenSecretManager jobTokenSecretMgr, String srcNameTrimmed,
+        ApplicationId appId, int dagIdentifier, JobTokenSecretManager jobTokenSecretMgr, String srcNameTrimmed,
         Configuration conf, boolean localDiskFetchEnabled, String localHostname, int shufflePort) {
-      this.fetcher = new Fetcher(fetcherCallback, params, inputManager, appId,
+      this.fetcher = new Fetcher(fetcherCallback, params, inputManager, appId, dagIdentifier,
           jobTokenSecretMgr, srcNameTrimmed, conf, null, null, null, localDiskFetchEnabled,
           false, localHostname, shufflePort);
     }
 
     public FetcherBuilder(FetcherCallback fetcherCallback,
         HttpConnectionParams params, FetchedInputAllocator inputManager,
-        ApplicationId appId, JobTokenSecretManager jobTokenSecretMgr, String srcNameTrimmed,
+        ApplicationId appId, int dagIdentifier, JobTokenSecretManager jobTokenSecretMgr, String srcNameTrimmed,
         Configuration conf, RawLocalFileSystem localFs,
         LocalDirAllocator localDirAllocator, Path lockPath,
         boolean localDiskFetchEnabled, boolean sharedFetchEnabled,
         String localHostname, int shufflePort) {
-      this.fetcher = new Fetcher(fetcherCallback, params, inputManager, appId,
+      this.fetcher = new Fetcher(fetcherCallback, params, inputManager, appId, dagIdentifier,
           jobTokenSecretMgr, srcNameTrimmed, conf, localFs, localDirAllocator,
           lockPath, localDiskFetchEnabled, sharedFetchEnabled, localHostname, shufflePort);
     }

http://git-wip-us.apache.org/repos/asf/tez/blob/fdb91771/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java
index 46489ed..d7cb7c1 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java
@@ -174,19 +174,21 @@ public class ShuffleUtils {
 
   // TODO NEWTEZ handle ssl shuffle
   public static StringBuilder constructBaseURIForShuffleHandler(String host,
-      int port, int partition, String appId, boolean sslShuffle) {
+      int port, int partition, String appId, int dagIdentifier, boolean sslShuffle) {
     return constructBaseURIForShuffleHandler(host + ":" + String.valueOf(port),
-      partition, appId, sslShuffle);
+      partition, appId, dagIdentifier, sslShuffle);
   }
   
   public static StringBuilder constructBaseURIForShuffleHandler(String hostIdentifier,
-      int partition, String appId, boolean sslShuffle) {
+      int partition, String appId, int dagIdentifier, boolean sslShuffle) {
     final String http_protocol = (sslShuffle) ? "https://" : "http://";
     StringBuilder sb = new StringBuilder(http_protocol);
     sb.append(hostIdentifier);
     sb.append("/");
     sb.append("mapOutput?job=");
     sb.append(appId.replace("application", "job"));
+    sb.append("&dag=");
+    sb.append(String.valueOf(dagIdentifier));
     sb.append("&reduce=");
     sb.append(String.valueOf(partition));
     sb.append("&map=");

http://git-wip-us.apache.org/repos/asf/tez/blob/fdb91771/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
index ac7caca..437de76 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
@@ -393,7 +393,7 @@ public class ShuffleManager implements FetcherCallback {
     }
 
     FetcherBuilder fetcherBuilder = new FetcherBuilder(ShuffleManager.this,
-      httpConnectionParams, inputManager, inputContext.getApplicationId(),
+      httpConnectionParams, inputManager, inputContext.getApplicationId(), inputContext.getDagIdentifier(),
         jobTokenSecretMgr, srcNameTrimmed, conf, localFs, localDirAllocator,
         lockDisk, localDiskFetchEnabled, sharedFetchEnabled,
         localhostName, shufflePort);

http://git-wip-us.apache.org/repos/asf/tez/blob/fdb91771/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java
index 32ac766..9481e65 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java
@@ -127,7 +127,7 @@ public class ShuffleInputEventHandlerOrderedGrouped {
   @VisibleForTesting
   URI getBaseURI(String host, int port, int partitionId) {
     StringBuilder sb = ShuffleUtils.constructBaseURIForShuffleHandler(host, port,
-      partitionId, inputContext.getApplicationId().toString(), sslShuffle);
+      partitionId, inputContext.getApplicationId().toString(), inputContext.getDagIdentifier(), sslShuffle);
     URI u = URI.create(sb.toString());
     return u;
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/fdb91771/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
index 4ef187d..d2b0bde 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
@@ -70,7 +70,7 @@ public class TestFetcher {
     final boolean DISABLE_LOCAL_FETCH = false;
 
     Fetcher.FetcherBuilder builder = new Fetcher.FetcherBuilder(fetcherCallback, null, null,
-        ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, ENABLE_LOCAL_FETCH, HOST, PORT);
+        ApplicationId.newInstance(0, 1), 1, null, "fetcherTest", conf, ENABLE_LOCAL_FETCH, HOST, PORT);
     builder.assignWork(HOST, PORT, 0, Arrays.asList(srcAttempts));
     Fetcher fetcher = spy(builder.build());
 
@@ -119,7 +119,7 @@ public class TestFetcher {
     // When disabled use http fetch
     conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, false);
     builder = new Fetcher.FetcherBuilder(fetcherCallback, null, null,
-        ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, DISABLE_LOCAL_FETCH, HOST, PORT);
+        ApplicationId.newInstance(0, 1), 1, null, "fetcherTest", conf, DISABLE_LOCAL_FETCH, HOST, PORT);
     builder.assignWork(HOST, PORT, 0, Arrays.asList(srcAttempts));
     fetcher = spy(builder.build());
 
@@ -152,7 +152,7 @@ public class TestFetcher {
     int partition = 42;
     FetcherCallback callback = mock(FetcherCallback.class);
     Fetcher.FetcherBuilder builder = new Fetcher.FetcherBuilder(callback, null, null,
-        ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, true, HOST, PORT);
+        ApplicationId.newInstance(0, 1), 1, null, "fetcherTest", conf, true, HOST, PORT);
     builder.assignWork(HOST, PORT, partition, Arrays.asList(srcAttempts));
     Fetcher fetcher = spy(builder.build());
 


[20/43] tez git commit: TEZ-2122. Setup pluggable components at AM/Vertex level. (sseth)

Posted by ss...@apache.org.
TEZ-2122. Setup pluggable components at AM/Vertex level. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/cee48099
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/cee48099
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/cee48099

Branch: refs/heads/TEZ-2003
Commit: cee4809942dbdeafabc7e9442e17aab54c54fdba
Parents: 5f27b83
Author: Siddharth Seth <ss...@apache.org>
Authored: Thu Feb 19 14:59:18 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:01 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                            |   1 +
 .../apache/tez/dag/api/TezConfiguration.java    |  29 +++-
 .../org/apache/tez/dag/api/TezConstants.java    |   3 +
 .../java/org/apache/tez/dag/app/AppContext.java |   4 +
 .../org/apache/tez/dag/app/DAGAppMaster.java    | 121 +++++++++++++-
 .../dag/app/TaskAttemptListenerImpTezDag.java   |  77 +++++----
 .../java/org/apache/tez/dag/app/dag/Vertex.java |   4 +
 .../tez/dag/app/dag/impl/TaskAttemptImpl.java   |   8 +-
 .../apache/tez/dag/app/dag/impl/VertexImpl.java |  47 ++++++
 .../app/launcher/ContainerLauncherRouter.java   |  93 +++++++----
 .../app/rm/AMSchedulerEventTALaunchRequest.java |  22 ++-
 .../dag/app/rm/TaskSchedulerEventHandler.java   | 163 +++++++++++--------
 .../apache/tez/dag/app/MockDAGAppMaster.java    |   5 +-
 .../app/TestTaskAttemptListenerImplTezDag.java  |  19 +--
 .../tez/dag/app/rm/TestContainerReuse.java      |   2 +-
 .../app/rm/TestTaskSchedulerEventHandler.java   |  12 +-
 .../dag/app/rm/TestTaskSchedulerHelpers.java    |  18 +-
 .../tez/tests/TestExternalTezServices.java      |  19 ++-
 18 files changed, 458 insertions(+), 189 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 1cd74a4..4bfe08f 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -3,5 +3,6 @@ ALL CHANGES:
   TEZ-2006. Task communication plane needs to be pluggable.
   TEZ-2090. Add tests for jobs running in external services.
   TEZ-2117. Add a manager for ContainerLaunchers running in the AM.
+  TEZ-2122. Setup pluggable components at AM/Vertex level.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
index 1cd478e..1f5f157 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
@@ -1168,13 +1168,36 @@ public class TezConfiguration extends Configuration {
       + "tez-ui.webservice.enable";
   public static final boolean TEZ_AM_WEBSERVICE_ENABLE_DEFAULT = true;
 
+  /** defaults container-launcher for the specific vertex */
   @ConfigurationScope(Scope.VERTEX)
-  public static final String TEZ_AM_CONTAINER_LAUNCHER_CLASS = TEZ_AM_PREFIX + "container-launcher.class";
+  public static final String TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME = TEZ_AM_PREFIX + "vertex.container-launcher.name";
+  /** defaults task-scheduler for the specific vertex */
   @ConfigurationScope(Scope.VERTEX)
-  public static final String TEZ_AM_TASK_SCHEDULER_CLASS = TEZ_AM_PREFIX + "task-scheduler.class";
+  public static final String TEZ_AM_VERTEX_TASK_SCHEDULER_NAME = TEZ_AM_PREFIX + "vertex.task-scheduler.name";
+  /** defaults task-communicator for the specific vertex */
   @ConfigurationScope(Scope.VERTEX)
-  public static final String TEZ_AM_TASK_COMMUNICATOR_CLASS = TEZ_AM_PREFIX + "task-communicator.class";
+  public static final String TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME = TEZ_AM_PREFIX + "vertex.task-communicator.name";
 
+  /** Comma separated list of named container-launcher classes running in the AM.
+   * The format for each entry is NAME:CLASSNAME, except for tez default which is specified as Tez
+   * e.g. Tez, ExtService:org.apache.ExtLauncherClasss
+   * */
+  @ConfigurationScope(Scope.AM)
+  public static final String TEZ_AM_CONTAINER_LAUNCHERS = TEZ_AM_PREFIX + "container-launchers";
+
+  /** Comma separated list of task-schedulers classes running in the AM.
+   * The format for each entry is NAME:CLASSNAME, except for tez default which is specified as Tez
+   * e.g. Tez, ExtService:org.apache.ExtSchedulerClasss
+   */
+  @ConfigurationScope(Scope.AM)
+  public static final String TEZ_AM_TASK_SCHEDULERS = TEZ_AM_PREFIX + "task-schedulers";
+
+  /** Comma separated list of task-communicators classes running in the AM.
+   * The format for each entry is NAME:CLASSNAME, except for tez default which is specified as Tez
+   * e.g. Tez, ExtService:org.apache.ExtTaskCommClass
+   * */
+   @ConfigurationScope(Scope.AM)
+  public static final String TEZ_AM_TASK_COMMUNICATORS = TEZ_AM_PREFIX + "task-communicators";
 
   // TODO only validate property here, value can also be validated if necessary
   public static void validateProperty(String property, Scope usedScope) {

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java
index bc4208f..3b07c59 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java
@@ -102,4 +102,7 @@ public class TezConstants {
   /// Version-related Environment variables
   public static final String TEZ_CLIENT_VERSION_ENV = "TEZ_CLIENT_VERSION";
 
+
+  public static final String TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT = "Tez";
+  public static final String TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT = "TezLocal";
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java b/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java
index 37f7624..9463226 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java
@@ -109,4 +109,8 @@ public interface AppContext {
   String getAMUser();
 
   Credentials getAppCredentials();
+
+  public Integer getTaskCommunicatorIdentifier(String name);
+  public Integer getTaskScheduerIdentifier(String name);
+  public Integer getContainerLauncherIdentifier(String name);
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index 0f4d812..6814cda 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -54,6 +54,8 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import com.google.common.collect.BiMap;
+import com.google.common.collect.HashBiMap;
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.GnuParser;
 import org.apache.commons.cli.Options;
@@ -266,7 +268,12 @@ public class DAGAppMaster extends AbstractService {
   
   private ExecutorService rawExecutor;
   private ListeningExecutorService execService;
-  
+
+  // TODO May not need to be a bidi map
+  private final BiMap<String, Integer> taskSchedulers = HashBiMap.create();
+  private final BiMap<String, Integer> containerLaunchers = HashBiMap.create();
+  private final BiMap<String, Integer> taskCommunicators = HashBiMap.create();
+
   /**
    * set of already executed dag names.
    */
@@ -370,6 +377,29 @@ public class DAGAppMaster extends AbstractService {
     this.isLocal = conf.getBoolean(TezConfiguration.TEZ_LOCAL_MODE,
         TezConfiguration.TEZ_LOCAL_MODE_DEFAULT);
 
+    String tezDefaultClassIdentifier =
+        isLocal ? TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT :
+            TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT;
+
+    String[] taskSchedulerClassIdentifiers = parsePlugins(taskSchedulers,
+        conf.getTrimmedStrings(TezConfiguration.TEZ_AM_TASK_SCHEDULERS,
+            tezDefaultClassIdentifier),
+        TezConfiguration.TEZ_AM_TASK_SCHEDULERS);
+
+    String[] containerLauncherClassIdentifiers = parsePlugins(containerLaunchers,
+        conf.getTrimmedStrings(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHERS,
+            tezDefaultClassIdentifier),
+        TezConfiguration.TEZ_AM_CONTAINER_LAUNCHERS);
+
+    String[] taskCommunicatorClassIdentifiers = parsePlugins(taskCommunicators,
+        conf.getTrimmedStrings(TezConfiguration.TEZ_AM_TASK_COMMUNICATORS,
+            tezDefaultClassIdentifier),
+        TezConfiguration.TEZ_AM_TASK_COMMUNICATORS);
+
+    LOG.info(buildPluginComponentLog(taskSchedulerClassIdentifiers, taskSchedulers, "TaskSchedulers"));
+    LOG.info(buildPluginComponentLog(containerLauncherClassIdentifiers, containerLaunchers, "ContainerLaunchers"));
+    LOG.info(buildPluginComponentLog(taskCommunicatorClassIdentifiers, taskCommunicators, "TaskCommunicators"));
+
     boolean disableVersionCheck = conf.getBoolean(
         TezConfiguration.TEZ_AM_DISABLE_CLIENT_VERSION_CHECK,
         TezConfiguration.TEZ_AM_DISABLE_CLIENT_VERSION_CHECK_DEFAULT);
@@ -435,7 +465,7 @@ public class DAGAppMaster extends AbstractService {
 
     //service to handle requests to TaskUmbilicalProtocol
     taskAttemptListener = createTaskAttemptListener(context,
-        taskHeartbeatHandler, containerHeartbeatHandler);
+        taskHeartbeatHandler, containerHeartbeatHandler, taskCommunicatorClassIdentifiers);
     addIfService(taskAttemptListener, true);
 
     containerSignatureMatcher = createContainerSignatureMatcher();
@@ -482,7 +512,8 @@ public class DAGAppMaster extends AbstractService {
     }
 
     this.taskSchedulerEventHandler = new TaskSchedulerEventHandler(context,
-        clientRpcServer, dispatcher.getEventHandler(), containerSignatureMatcher, webUIService);
+        clientRpcServer, dispatcher.getEventHandler(), containerSignatureMatcher, webUIService,
+        taskSchedulerClassIdentifiers);
     addIfService(taskSchedulerEventHandler, true);
 
     if (enableWebUIService()) {
@@ -500,7 +531,7 @@ public class DAGAppMaster extends AbstractService {
         taskSchedulerEventHandler);
     addIfServiceDependency(taskSchedulerEventHandler, clientRpcServer);
 
-    this.containerLauncherRouter = createContainerLauncherRouter(conf);
+    this.containerLauncherRouter = createContainerLauncherRouter(conf, containerLauncherClassIdentifiers);
     addIfService(containerLauncherRouter, true);
     dispatcher.register(NMCommunicatorEventType.class, containerLauncherRouter);
 
@@ -1007,9 +1038,9 @@ public class DAGAppMaster extends AbstractService {
   }
 
   protected TaskAttemptListener createTaskAttemptListener(AppContext context,
-      TaskHeartbeatHandler thh, ContainerHeartbeatHandler chh) {
+      TaskHeartbeatHandler thh, ContainerHeartbeatHandler chh, String[] taskCommunicatorClasses) {
     TaskAttemptListener lis =
-        new TaskAttemptListenerImpTezDag(context, thh, chh,jobTokenSecretManager);
+        new TaskAttemptListenerImpTezDag(context, thh, chh,jobTokenSecretManager, taskCommunicatorClasses);
     return lis;
   }
 
@@ -1030,9 +1061,9 @@ public class DAGAppMaster extends AbstractService {
     return chh;
   }
 
-  protected ContainerLauncherRouter createContainerLauncherRouter(Configuration conf) throws
+  protected ContainerLauncherRouter createContainerLauncherRouter(Configuration conf, String []containerLauncherClasses) throws
       UnknownHostException {
-    return  new ContainerLauncherRouter(conf, isLocal, context, taskAttemptListener, workingDirectory);
+    return  new ContainerLauncherRouter(conf, context, taskAttemptListener, workingDirectory, containerLauncherClasses);
 
   }
 
@@ -1459,6 +1490,21 @@ public class DAGAppMaster extends AbstractService {
     }
 
     @Override
+    public Integer getTaskCommunicatorIdentifier(String name) {
+      return taskCommunicators.get(name);
+    }
+
+    @Override
+    public Integer getTaskScheduerIdentifier(String name) {
+      return taskSchedulers.get(name);
+    }
+
+    @Override
+    public Integer getContainerLauncherIdentifier(String name) {
+      return taskCommunicators.get(name);
+    }
+
+    @Override
     public Map<ApplicationAccessType, String> getApplicationACLs() {
       if (getServiceState() != STATE.STARTED) {
         throw new TezUncheckedException(
@@ -2233,4 +2279,63 @@ public class DAGAppMaster extends AbstractService {
     return amConf.getBoolean(TezConfiguration.TEZ_AM_WEBSERVICE_ENABLE,
         TezConfiguration.TEZ_AM_WEBSERVICE_ENABLE_DEFAULT);
   }
+
+  // Tez default classnames are populated as TezConfiguration.TEZ_AM_SERVICE_PLUGINS_DEFAULT
+  private String[] parsePlugins(BiMap<String, Integer> pluginMap, String[] pluginStrings,
+                                   String context) {
+    Preconditions.checkState(pluginStrings != null && pluginStrings.length > 0,
+        "Plugin strings should not be null or empty: " + context);
+
+    String[] classNames = new String[pluginStrings.length];
+
+    int index = 0;
+    for (String pluginString : pluginStrings) {
+
+      String className;
+      String identifierString;
+
+      Preconditions.checkState(pluginString != null && !pluginString.isEmpty(),
+          "Plugin string: " + pluginString + " should not be null or empty");
+      if (pluginString.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT) ||
+          pluginString.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
+        // Kind of ugly, but Tez internal routing is encoded via a String instead of classnames.
+        // Individual components - TaskComm, Scheduler, Launcher deal with actual classname translation,
+        // and avoid reflection.
+        identifierString = pluginString;
+        className = pluginString;
+      } else {
+        String[] parts = pluginString.split(":");
+        Preconditions.checkState(
+            parts.length == 2 && parts[0] != null && !parts[0].isEmpty() && parts[1] != null &&
+                !parts[1].isEmpty(),
+            "Invalid configuration string for " + context + ": " + pluginString);
+        Preconditions.checkState(
+            !parts[0].equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT) &&
+                !parts[0].equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT),
+            "Identifier cannot be " + TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT + " or " +
+                TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT + " for " +
+                pluginString);
+        identifierString = parts[0];
+        className = parts[1];
+      }
+      pluginMap.put(identifierString, index);
+      classNames[index] = className;
+    }
+    return classNames;
+  }
+
+  String buildPluginComponentLog(String[] classIdentifiers, BiMap<String, Integer> map,
+                                 String component) {
+    StringBuilder sb = new StringBuilder();
+    sb.append("AM Level configured ").append(component).append(": ");
+    for (int i = 0; i < classIdentifiers.length; i++) {
+      sb.append("[").append(i).append(":").append(map.inverse().get(i)).append(":")
+          .append(taskSchedulers.inverse().get(i)).append(
+          "]");
+      if (i != classIdentifiers.length - 1) {
+        sb.append(",");
+      }
+    }
+    return sb.toString();
+  }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index 0d9dc31..2f6dcf5 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -38,7 +38,6 @@ import org.apache.tez.runtime.api.impl.EventType;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
@@ -47,7 +46,7 @@ import org.apache.tez.common.ReflectionUtils;
 import org.apache.tez.dag.api.TaskCommunicator;
 import org.apache.tez.dag.api.TaskCommunicatorContext;
 import org.apache.tez.dag.api.TaskHeartbeatResponse;
-import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezConstants;
 import org.apache.tez.dag.api.TezException;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -57,7 +56,6 @@ import org.apache.tez.dag.app.dag.DAG;
 import org.apache.tez.dag.app.dag.Task;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely;
 import org.apache.tez.dag.app.dag.event.VertexEventRouteEvent;
-import org.apache.tez.dag.app.rm.TaskSchedulerService;
 import org.apache.tez.dag.app.rm.container.AMContainerTask;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezVertexID;
@@ -74,7 +72,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
       .getLogger(TaskAttemptListenerImpTezDag.class);
 
   private final AppContext context;
-  private TaskCommunicator taskCommunicator;
+  private final TaskCommunicator[] taskCommunicators;
 
   protected final TaskHeartbeatHandler taskHeartbeatHandler;
   protected final ContainerHeartbeatHandler containerHeartbeatHandler;
@@ -100,28 +98,52 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
   public TaskAttemptListenerImpTezDag(AppContext context,
                                       TaskHeartbeatHandler thh, ContainerHeartbeatHandler chh,
                                       // TODO TEZ-2003 pre-merge. Remove reference to JobTokenSecretManager.
-                                      JobTokenSecretManager jobTokenSecretManager) {
+                                      JobTokenSecretManager jobTokenSecretManager,
+                                      String [] taskCommunicatorClassIdentifiers) {
     super(TaskAttemptListenerImpTezDag.class.getName());
     this.context = context;
     this.taskHeartbeatHandler = thh;
     this.containerHeartbeatHandler = chh;
-    this.taskCommunicator = new TezTaskCommunicatorImpl(this);
+    if (taskCommunicatorClassIdentifiers == null || taskCommunicatorClassIdentifiers.length == 0) {
+      taskCommunicatorClassIdentifiers = new String[] {TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+    }
+    this.taskCommunicators = new TaskCommunicator[taskCommunicatorClassIdentifiers.length];
+    for (int i = 0 ; i < taskCommunicatorClassIdentifiers.length ; i++) {
+      taskCommunicators[i] = createTaskCommunicator(taskCommunicatorClassIdentifiers[i]);
+    }
+    // TODO TEZ-2118 Start using taskCommunicator indices properly
+  }
+
+  @Override
+  public void serviceStart() {
+    // TODO Why is init tied to serviceStart
+    for (int i = 0 ; i < taskCommunicators.length ; i++) {
+      taskCommunicators[i].init(getConfig());
+      taskCommunicators[i].start();
+    }
   }
 
   @Override
-  public void serviceInit(Configuration conf) {
-    String taskCommClassName = conf.get(TezConfiguration.TEZ_AM_TASK_COMMUNICATOR_CLASS);
-    if (taskCommClassName == null) {
+  public void serviceStop() {
+    for (int i = 0 ; i < taskCommunicators.length ; i++) {
+      taskCommunicators[i].stop();
+    }
+  }
+
+  private TaskCommunicator createTaskCommunicator(String taskCommClassIdentifier) {
+    if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT) ||
+        taskCommClassIdentifier
+            .equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
       LOG.info("Using Default Task Communicator");
-      this.taskCommunicator = new TezTaskCommunicatorImpl(this);
+      return new TezTaskCommunicatorImpl(this);
     } else {
-      LOG.info("Using TaskCommunicator: " + taskCommClassName);
+      LOG.info("Using TaskCommunicator: " + taskCommClassIdentifier);
       Class<? extends TaskCommunicator> taskCommClazz = (Class<? extends TaskCommunicator>) ReflectionUtils
-          .getClazz(taskCommClassName);
+          .getClazz(taskCommClassIdentifier);
       try {
         Constructor<? extends TaskCommunicator> ctor = taskCommClazz.getConstructor(TaskCommunicatorContext.class);
         ctor.setAccessible(true);
-        this.taskCommunicator = ctor.newInstance(this);
+        return ctor.newInstance(this);
       } catch (NoSuchMethodException e) {
         throw new TezUncheckedException(e);
       } catch (InvocationTargetException e) {
@@ -135,20 +157,6 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
   }
 
   @Override
-  public void serviceStart() {
-    taskCommunicator.init(getConfig());
-    taskCommunicator.start();
-  }
-
-  @Override
-  public void serviceStop() {
-    if (taskCommunicator != null) {
-      taskCommunicator.stop();
-      taskCommunicator = null;
-    }
-  }
-
-  @Override
   public ApplicationAttemptId getApplicationAttemptId() {
     return context.getApplicationAttemptId();
   }
@@ -236,7 +244,8 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
 
   @Override
   public void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId) {
-    context.getEventHandler().handle(new TaskAttemptEventStartedRemotely(taskAttemptID, containerId, null));
+    context.getEventHandler()
+        .handle(new TaskAttemptEventStartedRemotely(taskAttemptID, containerId, null));
     pingContainerHeartbeatHandler(containerId);
   }
 
@@ -266,7 +275,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
 
   @Override
   public InetSocketAddress getAddress() {
-    return taskCommunicator.getAddress();
+    return taskCommunicators[0].getAddress();
   }
 
   // The TaskAttemptListener register / unregister methods in this class are not thread safe.
@@ -298,7 +307,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
           "Multiple registrations for containerId: " + containerId);
     }
     NodeId nodeId = context.getAllContainers().get(containerId).getContainer().getNodeId();
-    taskCommunicator.registerRunningContainer(containerId, nodeId.getHost(), nodeId.getPort());
+    taskCommunicators[0].registerRunningContainer(containerId, nodeId.getHost(), nodeId.getPort());
   }
 
   @Override
@@ -310,7 +319,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
     if (containerInfo.taskAttemptId != null) {
       registeredAttempts.remove(containerInfo.taskAttemptId);
     }
-    taskCommunicator.registerContainerEnd(containerId);
+    taskCommunicators[0].registerContainerEnd(containerId);
   }
 
   @Override
@@ -345,7 +354,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
           + amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId
           + " when already assigned to: " + containerIdFromMap);
     }
-    taskCommunicator.registerRunningTaskAttempt(containerId, amContainerTask.getTask(),
+    taskCommunicators[0].registerRunningTaskAttempt(containerId, amContainerTask.getTask(),
         amContainerTask.getAdditionalResources(), amContainerTask.getCredentials(),
         amContainerTask.haveCredentialsChanged());
   }
@@ -365,7 +374,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
     }
     // Explicitly putting in a new entry so that synchronization is not required on the existing element in the map.
     registeredContainers.put(containerId, NULL_CONTAINER_INFO);
-    taskCommunicator.unregisterRunningTaskAttempt(attemptId);
+    taskCommunicators[0].unregisterRunningTaskAttempt(attemptId);
   }
 
   private void pingContainerHeartbeatHandler(ContainerId containerId) {
@@ -383,6 +392,6 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
   }
 
   public TaskCommunicator getTaskCommunicator() {
-    return taskCommunicator;
+    return taskCommunicators[0];
   }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
index bb42392..8b60dc3 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
@@ -173,4 +173,8 @@ public interface Vertex extends Comparable<Vertex> {
   public int getKilledTaskAttemptCount();
 
   public Configuration getConf();
+
+  public int getTaskSchedulerIdentifier();
+  public int getContainerLauncherIdentifier();
+  public int getTaskCommunicatorIdentifier();
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
index b1c0acc..c18dc00 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
@@ -1066,9 +1066,15 @@ public class TaskAttemptImpl implements TaskAttempt,
         priority = (scheduleEvent.getPriorityHighLimit() + scheduleEvent.getPriorityLowLimit()) / 2;
       }
 
+      // TODO Jira post TEZ-2003 getVertex implementation is very inefficient. This should be via references, instead of locked table lookups.
+      Vertex vertex = ta.getVertex();
       AMSchedulerEventTALaunchRequest launchRequestEvent = new AMSchedulerEventTALaunchRequest(
           ta.attemptId, ta.taskResource, remoteTaskSpec, ta, locationHint,
-          priority, ta.containerContext);
+          priority, ta.containerContext,
+          vertex.getTaskSchedulerIdentifier(),
+          vertex.getContainerLauncherIdentifier(),
+          vertex.getTaskCommunicatorIdentifier());
+
       ta.sendEvent(launchRequestEvent);
       return TaskAttemptStateInternal.START_WAIT;
     }

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
index 6b208b0..097cf3d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
@@ -73,6 +73,7 @@ import org.apache.tez.dag.api.ProcessorDescriptor;
 import org.apache.tez.dag.api.RootInputLeafOutput;
 import org.apache.tez.dag.api.Scope;
 import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezConstants;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.api.VertexLocationHint;
 import org.apache.tez.dag.api.TaskLocationHint;
@@ -230,6 +231,10 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
   @VisibleForTesting
   public boolean useOnDemandRouting = true;
 
+  private final int taskSchedulerIdentifier;
+  private final int containerLauncherIdentifier;
+  private final int taskCommunicatorIdentifier;
+
   //fields initialized in init
 
   @VisibleForTesting
@@ -959,6 +964,33 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
     // This "this leak" is okay because the retained pointer is in an
     //  instance variable.
 
+    boolean isLocal = vertexConf.getBoolean(TezConfiguration.TEZ_LOCAL_MODE,
+        TezConfiguration.TEZ_LOCAL_MODE_DEFAULT);
+
+    String tezDefaultComponentName =
+        isLocal ? TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT :
+            TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT;
+    String taskSchedulerName =
+        vertexConf.get(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME, tezDefaultComponentName);
+    String taskCommName = vertexConf
+        .get(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME, tezDefaultComponentName);
+    String containerLauncherName = vertexConf
+        .get(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME, tezDefaultComponentName);
+    taskSchedulerIdentifier = appContext.getTaskScheduerIdentifier(taskSchedulerName);
+    taskCommunicatorIdentifier = appContext.getTaskCommunicatorIdentifier(taskCommName);
+    containerLauncherIdentifier = appContext.getContainerLauncherIdentifier(containerLauncherName);
+
+    Preconditions.checkNotNull(taskSchedulerIdentifier, "Unknown taskScheduler: " + taskSchedulerName);
+    Preconditions.checkNotNull(taskCommunicatorIdentifier, "Unknown taskCommunicator: " + containerLauncherName);
+    Preconditions.checkNotNull(containerLauncherIdentifier, "Unknown containerLauncher: " + taskCommName);
+
+    StringBuilder sb = new StringBuilder();
+    sb.append("Running vertex: ").append(logIdentifier).append(" : ")
+        .append("TaskScheduler=").append(taskSchedulerIdentifier).append(":").append(taskSchedulerName)
+        .append(", ContainerLauncher=").append(containerLauncherIdentifier).append(":").append(containerLauncherName)
+        .append(", TaskCommunicator=").append(taskCommunicatorIdentifier).append(":").append(taskCommName);
+    LOG.info(sb.toString());
+
     stateMachine = new StateMachineTez<VertexState, VertexEventType, VertexEvent, VertexImpl>(
         stateMachineFactory.make(this), this);
     augmentStateMachine();
@@ -969,6 +1001,21 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
     return vertexConf;
   }
 
+  @Override
+  public int getTaskSchedulerIdentifier() {
+    return this.taskSchedulerIdentifier;
+  }
+
+  @Override
+  public int getContainerLauncherIdentifier() {
+    return this.containerLauncherIdentifier;
+  }
+
+  @Override
+  public int getTaskCommunicatorIdentifier() {
+    return this.taskCommunicatorIdentifier;
+  }
+
   private boolean isSpeculationEnabled() {
     return isSpeculationEnabled;
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
index 34001ed..621e4a8 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.tez.common.ReflectionUtils;
 import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezConstants;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.dag.app.TaskAttemptListener;
@@ -36,73 +37,93 @@ public class ContainerLauncherRouter extends AbstractService
 
   static final Log LOG = LogFactory.getLog(ContainerLauncherImpl.class);
 
-  private final ContainerLauncher containerLauncher;
+  private final ContainerLauncher containerLaunchers[];
 
   @VisibleForTesting
   public ContainerLauncherRouter(ContainerLauncher containerLauncher) {
     super(ContainerLauncherRouter.class.getName());
-    this.containerLauncher = containerLauncher;
+    containerLaunchers = new ContainerLauncher[] {containerLauncher};
   }
 
   // Accepting conf to setup final parameters, if required.
-  public ContainerLauncherRouter(Configuration conf, boolean isLocal, AppContext context,
+  public ContainerLauncherRouter(Configuration conf, AppContext context,
                                  TaskAttemptListener taskAttemptListener,
-                                 String workingDirectory) throws UnknownHostException {
+                                 String workingDirectory,
+                                 String[] containerLauncherClassIdentifiers) throws UnknownHostException {
     super(ContainerLauncherRouter.class.getName());
 
-    if (isLocal) {
+    if (containerLauncherClassIdentifiers == null || containerLauncherClassIdentifiers.length == 0) {
+      containerLauncherClassIdentifiers = new String[] {TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+    }
+    containerLaunchers = new ContainerLauncher[containerLauncherClassIdentifiers.length];
+
+    for (int i = 0; i < containerLauncherClassIdentifiers.length; i++) {
+      containerLaunchers[i] = createContainerLauncher(containerLauncherClassIdentifiers[i], context,
+          taskAttemptListener, workingDirectory, conf);
+    }
+  }
+
+  private ContainerLauncher createContainerLauncher(String containerLauncherClassIdentifier,
+                                                    AppContext context,
+                                                    TaskAttemptListener taskAttemptListener,
+                                                    String workingDirectory,
+                                                    Configuration conf) throws
+      UnknownHostException {
+    if (containerLauncherClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
+      LOG.info("Creating DefaultContainerLauncher");
+      return new ContainerLauncherImpl(context);
+    } else if (containerLauncherClassIdentifier
+        .equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
       LOG.info("Creating LocalContainerLauncher");
-      containerLauncher =
+      return
           new LocalContainerLauncher(context, taskAttemptListener, workingDirectory);
     } else {
-      // TODO: Temporary reflection with specific parameters until a clean interface is defined.
-      String containerLauncherClassName =
-          conf.get(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHER_CLASS);
-      if (containerLauncherClassName == null) {
-        LOG.info("Creating Default Container Launcher");
-        containerLauncher = new ContainerLauncherImpl(context);
-      } else {
-        LOG.info("Creating container launcher : " + containerLauncherClassName);
-        Class<? extends ContainerLauncher> containerLauncherClazz =
-            (Class<? extends ContainerLauncher>) ReflectionUtils.getClazz(
-                containerLauncherClassName);
-        try {
-          Constructor<? extends ContainerLauncher> ctor = containerLauncherClazz
-              .getConstructor(AppContext.class, Configuration.class, TaskAttemptListener.class);
-          ctor.setAccessible(true);
-          containerLauncher = ctor.newInstance(context, conf, taskAttemptListener);
-        } catch (NoSuchMethodException e) {
-          throw new TezUncheckedException(e);
-        } catch (InvocationTargetException e) {
-          throw new TezUncheckedException(e);
-        } catch (InstantiationException e) {
-          throw new TezUncheckedException(e);
-        } catch (IllegalAccessException e) {
-          throw new TezUncheckedException(e);
-        }
+      LOG.info("Creating container launcher : " + containerLauncherClassIdentifier);
+      Class<? extends ContainerLauncher> containerLauncherClazz =
+          (Class<? extends ContainerLauncher>) ReflectionUtils.getClazz(
+              containerLauncherClassIdentifier);
+      try {
+        Constructor<? extends ContainerLauncher> ctor = containerLauncherClazz
+            .getConstructor(AppContext.class, Configuration.class, TaskAttemptListener.class);
+        ctor.setAccessible(true);
+        return ctor.newInstance(context, conf, taskAttemptListener);
+      } catch (NoSuchMethodException e) {
+        throw new TezUncheckedException(e);
+      } catch (InvocationTargetException e) {
+        throw new TezUncheckedException(e);
+      } catch (InstantiationException e) {
+        throw new TezUncheckedException(e);
+      } catch (IllegalAccessException e) {
+        throw new TezUncheckedException(e);
       }
-
     }
+    // TODO TEZ-2118 Handle routing to multiple launchers
   }
 
   @Override
   public void serviceInit(Configuration conf) {
-    ((AbstractService)containerLauncher).init(conf);
+    for (int i = 0 ; i < containerLaunchers.length ; i++) {
+      ((AbstractService) containerLaunchers[i]).init(conf);
+    }
   }
 
   @Override
   public void serviceStart() {
-    ((AbstractService)containerLauncher).start();
+    for (int i = 0 ; i < containerLaunchers.length ; i++) {
+      ((AbstractService) containerLaunchers[i]).start();
+    }
   }
 
   @Override
   public void serviceStop() {
-    ((AbstractService)containerLauncher).stop();
+    for (int i = 0 ; i < containerLaunchers.length ; i++) {
+      ((AbstractService) containerLaunchers[i]).stop();
+    }
   }
 
 
   @Override
   public void handle(NMCommunicatorEvent event) {
-    containerLauncher.handle(event);
+    containerLaunchers[0].handle(event);
   }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTALaunchRequest.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTALaunchRequest.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTALaunchRequest.java
index 5c4d43c..c59193c 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTALaunchRequest.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTALaunchRequest.java
@@ -38,11 +38,16 @@ public class AMSchedulerEventTALaunchRequest extends AMSchedulerEvent {
   private final TaskSpec remoteTaskSpec;
   private final TaskAttempt taskAttempt;
 
+  private final int schedulerId;
+  private final int launcherId;
+  private final int taskCommId;
+
   public AMSchedulerEventTALaunchRequest(TezTaskAttemptID attemptId,
       Resource capability,
       TaskSpec remoteTaskSpec, TaskAttempt ta,
       TaskLocationHint locationHint, int priority,
-      ContainerContext containerContext) {
+      ContainerContext containerContext,
+      int schedulerId, int launcherId, int taskCommId) {
     super(AMSchedulerEventType.S_TA_LAUNCH_REQUEST);
     this.attemptId = attemptId;
     this.capability = capability;
@@ -51,6 +56,9 @@ public class AMSchedulerEventTALaunchRequest extends AMSchedulerEvent {
     this.locationHint = locationHint;
     this.priority = priority;
     this.containerContext = containerContext;
+    this.schedulerId = schedulerId;
+    this.launcherId = launcherId;
+    this.taskCommId = taskCommId;
   }
 
   public TezTaskAttemptID getAttemptID() {
@@ -81,6 +89,18 @@ public class AMSchedulerEventTALaunchRequest extends AMSchedulerEvent {
     return this.containerContext;
   }
 
+  public int getSchedulerId() {
+    return schedulerId;
+  }
+
+  public int getLauncherId() {
+    return launcherId;
+  }
+
+  public int getTaskCommId() {
+    return taskCommId;
+  }
+
   // Parameter replacement: @taskid@ will not be usable
   // ProfileTaskRange not available along with ContainerReUse
 

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
index 8c3ed87..72389e7 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.yarn.event.Event;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.tez.common.ReflectionUtils;
 import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezConstants;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.api.TaskLocationHint;
 import org.apache.tez.dag.api.TaskLocationHint.TaskBasedLocationAffinity;
@@ -92,7 +93,6 @@ public class TaskSchedulerEventHandler extends AbstractService
   @SuppressWarnings("rawtypes")
   private final EventHandler eventHandler;
   private final String historyUrl;
-  protected TaskSchedulerService taskScheduler;
   private DAGAppMaster dagAppMaster;
   private Map<ApplicationAccessType, String> appAcls = null;
   private Thread eventHandlingThread;
@@ -105,14 +105,27 @@ public class TaskSchedulerEventHandler extends AbstractService
   private AtomicBoolean shouldUnregisterFlag =
       new AtomicBoolean(false);
   private final WebUIService webUI;
+  private final String[] taskSchedulerClasses;
+  protected final TaskSchedulerService []taskSchedulers;
 
   BlockingQueue<AMSchedulerEvent> eventQueue
                               = new LinkedBlockingQueue<AMSchedulerEvent>();
 
+  /**
+   *
+   * @param appContext
+   * @param clientService
+   * @param eventHandler
+   * @param containerSignatureMatcher
+   * @param webUI
+   * @param schedulerClasses the list of scheduler classes / codes. Tez internal classes are represented as codes.
+   *                         An empty list defaults to using the YarnTaskScheduler as the only source.
+   */
   @SuppressWarnings("rawtypes")
   public TaskSchedulerEventHandler(AppContext appContext,
       DAGClientServer clientService, EventHandler eventHandler, 
-      ContainerSignatureMatcher containerSignatureMatcher, WebUIService webUI) {
+      ContainerSignatureMatcher containerSignatureMatcher, WebUIService webUI,
+      String [] schedulerClasses) {
     super(TaskSchedulerEventHandler.class.getName());
     this.appContext = appContext;
     this.eventHandler = eventHandler;
@@ -123,6 +136,12 @@ public class TaskSchedulerEventHandler extends AbstractService
     if (this.webUI != null) {
       this.webUI.setHistoryUrl(this.historyUrl);
     }
+    if (schedulerClasses == null || schedulerClasses.length == 0) {
+      this.taskSchedulerClasses = new String[] {TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+    } else {
+      this.taskSchedulerClasses = schedulerClasses;
+    }
+    taskSchedulers = new TaskSchedulerService[this.taskSchedulerClasses.length];
   }
 
   public Map<ApplicationAccessType, String> getApplicationAcls() {
@@ -139,11 +158,11 @@ public class TaskSchedulerEventHandler extends AbstractService
   }
   
   public Resource getAvailableResources() {
-    return taskScheduler.getAvailableResources();
+    return taskSchedulers[0].getAvailableResources();
   }
 
   public Resource getTotalResources() {
-    return taskScheduler.getTotalResources();
+    return taskSchedulers[0].getTotalResources();
   }
 
   public synchronized void handleEvent(AMSchedulerEvent sEvent) {
@@ -209,9 +228,9 @@ public class TaskSchedulerEventHandler extends AbstractService
 
   private void handleNodeBlacklistUpdate(AMSchedulerEventNodeBlacklistUpdate event) {
     if (event.getType() == AMSchedulerEventType.S_NODE_BLACKLISTED) {
-      taskScheduler.blacklistNode(event.getNodeId());
+      taskSchedulers[0].blacklistNode(event.getNodeId());
     } else if (event.getType() == AMSchedulerEventType.S_NODE_UNBLACKLISTED) {
-      taskScheduler.unblacklistNode(event.getNodeId());
+      taskSchedulers[0].unblacklistNode(event.getNodeId());
     } else {
       throw new TezUncheckedException("Invalid event type: " + event.getType());
     }
@@ -223,14 +242,14 @@ public class TaskSchedulerEventHandler extends AbstractService
     // TODO what happens to the task that was connected to this container?
     // current assumption is that it will eventually call handleTaStopRequest
     //TaskAttempt taskAttempt = (TaskAttempt)
-    taskScheduler.deallocateContainer(containerId);
+    taskSchedulers[0].deallocateContainer(containerId);
     // TODO does this container need to be stopped via C_STOP_REQUEST
     sendEvent(new AMContainerEventStopRequest(containerId));
   }
 
   private void handleTAUnsuccessfulEnd(AMSchedulerEventTAEnded event) {
     TaskAttempt attempt = event.getAttempt();
-    boolean wasContainerAllocated = taskScheduler.deallocateTask(attempt, false);
+    boolean wasContainerAllocated = taskSchedulers[0].deallocateTask(attempt, false);
     // use stored value of container id in case the scheduler has removed this
     // assignment because the task has been deallocated earlier.
     // retroactive case
@@ -272,7 +291,7 @@ public class TaskSchedulerEventHandler extends AbstractService
           event.getAttemptID()));
     }
 
-    boolean wasContainerAllocated = taskScheduler.deallocateTask(attempt, true);
+    boolean wasContainerAllocated = taskSchedulers[0].deallocateTask(attempt, true);
     if (!wasContainerAllocated) {
       LOG.error("De-allocated successful task: " + attempt.getID()
           + ", but TaskScheduler reported no container assigned to task");
@@ -297,7 +316,7 @@ public class TaskSchedulerEventHandler extends AbstractService
         TaskAttempt affinityAttempt = vertex.getTask(taskIndex).getSuccessfulAttempt();
         if (affinityAttempt != null) {
           Preconditions.checkNotNull(affinityAttempt.getAssignedContainerID(), affinityAttempt.getID());
-          taskScheduler.allocateTask(taskAttempt,
+          taskSchedulers[0].allocateTask(taskAttempt,
               event.getCapability(),
               affinityAttempt.getAssignedContainerID(),
               Priority.newInstance(event.getPriority()),
@@ -316,57 +335,59 @@ public class TaskSchedulerEventHandler extends AbstractService
             .toArray(new String[locationHint.getRacks().size()]) : null;
       }
     }
-    
-    taskScheduler.allocateTask(taskAttempt,
-                               event.getCapability(),
-                               hosts,
-                               racks,
-                               Priority.newInstance(event.getPriority()),
-                               event.getContainerContext(),
-                               event);
-  }
-
-
-  protected TaskSchedulerService createTaskScheduler(String host, int port,
-      String trackingUrl, AppContext appContext) {
-    boolean isLocal = getConfig().getBoolean(TezConfiguration.TEZ_LOCAL_MODE,
-        TezConfiguration.TEZ_LOCAL_MODE_DEFAULT);
-    if (isLocal) {
-      LOG.info("Using TaskScheduler: LocalTaskSchedulerService");
+
+    taskSchedulers[0].allocateTask(taskAttempt,
+        event.getCapability(),
+        hosts,
+        racks,
+        Priority.newInstance(event.getPriority()),
+        event.getContainerContext(),
+        event);
+  }
+
+  private TaskSchedulerService createTaskScheduler(String host, int port, String trackingUrl,
+                                                   AppContext appContext,
+                                                   String schedulerClassName) {
+    if (schedulerClassName.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
+      LOG.info("Creating TaskScheduler: YarnTaskSchedulerService");
+      return new YarnTaskSchedulerService(this, this.containerSignatureMatcher,
+          host, port, trackingUrl, appContext);
+    } else if (schedulerClassName.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
+      LOG.info("Creating TaskScheduler: Local TaskScheduler");
       return new LocalTaskSchedulerService(this, this.containerSignatureMatcher,
           host, port, trackingUrl, appContext);
-    }
-    else {
-      String schedulerClassName = getConfig().get(TezConfiguration.TEZ_AM_TASK_SCHEDULER_CLASS);
-      if (schedulerClassName == null) {
-        LOG.info("Using TaskScheduler: YarnTaskSchedulerService");
-        return new YarnTaskSchedulerService(this, this.containerSignatureMatcher,
-            host, port, trackingUrl, appContext);
-      } else {
-        LOG.info("Using custom TaskScheduler: " + schedulerClassName);
-        // TODO Temporary reflection with specific parameters. Remove once there is a clean interface.
-        Class<? extends TaskSchedulerService> taskSchedulerClazz =
-            (Class<? extends TaskSchedulerService>) ReflectionUtils.getClazz(schedulerClassName);
-        try {
-          Constructor<? extends TaskSchedulerService> ctor = taskSchedulerClazz
-              .getConstructor(TaskSchedulerAppCallback.class, AppContext.class, String.class,
-                  int.class, String.class, Configuration.class);
-          ctor.setAccessible(true);
-          TaskSchedulerService taskSchedulerService =
-              ctor.newInstance(this, appContext, host, port, trackingUrl, getConfig());
-          return taskSchedulerService;
-        } catch (NoSuchMethodException e) {
-          throw new TezUncheckedException(e);
-        } catch (InvocationTargetException e) {
-          throw new TezUncheckedException(e);
-        } catch (InstantiationException e) {
-          throw new TezUncheckedException(e);
-        } catch (IllegalAccessException e) {
-          throw new TezUncheckedException(e);
-        }
+    } else {
+      LOG.info("Creating custom TaskScheduler: " + schedulerClassName);
+      // TODO TEZ-2003 Temporary reflection with specific parameters. Remove once there is a clean interface.
+      Class<? extends TaskSchedulerService> taskSchedulerClazz =
+          (Class<? extends TaskSchedulerService>) ReflectionUtils.getClazz(schedulerClassName);
+      try {
+        Constructor<? extends TaskSchedulerService> ctor = taskSchedulerClazz
+            .getConstructor(TaskSchedulerAppCallback.class, AppContext.class, String.class,
+                int.class, String.class, Configuration.class);
+        ctor.setAccessible(true);
+        return ctor.newInstance(this, appContext, host, port, trackingUrl, getConfig());
+      } catch (NoSuchMethodException e) {
+        throw new TezUncheckedException(e);
+      } catch (InvocationTargetException e) {
+        throw new TezUncheckedException(e);
+      } catch (InstantiationException e) {
+        throw new TezUncheckedException(e);
+      } catch (IllegalAccessException e) {
+        throw new TezUncheckedException(e);
       }
     }
   }
+
+  @VisibleForTesting
+  protected void instantiateScheduelrs(String host, int port, String trackingUrl, AppContext appContext) {
+    // Iterate over the list and create all the taskSchedulers
+    for (int i = 0; i < taskSchedulerClasses.length; i++) {
+      taskSchedulers[i] = createTaskScheduler(host, port,
+          trackingUrl, appContext, taskSchedulerClasses[i]);
+    }
+  }
+
   
   @Override
   public synchronized void serviceStart() {
@@ -377,13 +398,17 @@ public class TaskSchedulerEventHandler extends AbstractService
     // always try to connect to AM and proxy the response. hence it wont work if the webUIService
     // is not enabled.
     String trackingUrl = (webUI != null) ? webUI.getTrackingURL() : "";
-    taskScheduler = createTaskScheduler(serviceAddr.getHostName(),
-        serviceAddr.getPort(), trackingUrl, appContext);
-    taskScheduler.init(getConfig());
-    taskScheduler.start();
+    instantiateScheduelrs(serviceAddr.getHostName(), serviceAddr.getPort(), trackingUrl, appContext);
+
+    for (int i = 0 ; i < taskSchedulers.length ; i++) {
+      taskSchedulers[i].init(getConfig());
+      taskSchedulers[i].start();
+    }
+
+    // TODO TEZ-2118 Start using multiple task schedulers
     if (shouldUnregisterFlag.get()) {
       // Flag may have been set earlier when task scheduler was not initialized
-      taskScheduler.setShouldUnregister();
+      taskSchedulers[0].setShouldUnregister();
     }
 
     this.eventHandlingThread = new Thread("TaskSchedulerEventHandlerThread") {
@@ -432,8 +457,8 @@ public class TaskSchedulerEventHandler extends AbstractService
       if (eventHandlingThread != null)
         eventHandlingThread.interrupt();
     }
-    if (taskScheduler != null) {
-      ((AbstractService)taskScheduler).stop();
+    if (taskSchedulers[0] != null) {
+      ((AbstractService)taskSchedulers[0]).stop();
     }
   }
 
@@ -578,7 +603,7 @@ public class TaskSchedulerEventHandler extends AbstractService
   public float getProgress() {
     // at this point allocate has been called and so node count must be available
     // may change after YARN-1722
-    int nodeCount = taskScheduler.getClusterNodeCount();
+    int nodeCount = taskSchedulers[0].getClusterNodeCount();
     if (nodeCount != cachedNodeCount) {
       cachedNodeCount = nodeCount;
       sendEvent(new AMNodeEventNodeCountUpdated(cachedNodeCount));
@@ -593,7 +618,7 @@ public class TaskSchedulerEventHandler extends AbstractService
   }
 
   public void dagCompleted() {
-    taskScheduler.dagComplete();
+    taskSchedulers[0].dagComplete();
   }
 
   public void dagSubmitted() {
@@ -603,7 +628,7 @@ public class TaskSchedulerEventHandler extends AbstractService
 
   @Override
   public void preemptContainer(ContainerId containerId) {
-    taskScheduler.deallocateContainer(containerId);
+    taskSchedulers[0].deallocateContainer(containerId);
     // Inform the Containers about completion.
     sendEvent(new AMContainerEventCompleted(containerId, ContainerExitStatus.INVALID,
         "Container preempted internally", TaskAttemptTerminationCause.INTERNAL_PREEMPTION));
@@ -612,13 +637,13 @@ public class TaskSchedulerEventHandler extends AbstractService
   public void setShouldUnregisterFlag() {
     LOG.info("TaskScheduler notified that it should unregister from RM");
     this.shouldUnregisterFlag.set(true);
-    if (this.taskScheduler != null) {
-      this.taskScheduler.setShouldUnregister();
+    if (this.taskSchedulers[0] != null) {
+      this.taskSchedulers[0].setShouldUnregister();
     }
   }
 
   public boolean hasUnregistered() {
-    return this.taskScheduler.hasUnregistered();
+    return this.taskSchedulers[0].hasUnregistered();
   }
 
   @VisibleForTesting

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index 7274cde..aace92b 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -507,10 +507,11 @@ public class MockDAGAppMaster extends DAGAppMaster {
     this.handlerConcurrency = handlerConcurrency;
     this.numConcurrentContainers = numConcurrentContainers;
   }
-  
+
   // use mock container launcher for tests
   @Override
-  protected ContainerLauncherRouter createContainerLauncherRouter(final Configuration conf)
+  protected ContainerLauncherRouter createContainerLauncherRouter(final Configuration conf,
+                                                                  String[] containerLaunchers)
       throws UnknownHostException {
     return new ContainerLauncherRouter(containerLauncher);
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
index 46c412e..33f4817 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
@@ -32,6 +32,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -109,18 +110,9 @@ public class TestTaskAttemptListenerImplTezDag {
     doReturn(dag).when(appContext).getCurrentDAG();
     doReturn(appAcls).when(appContext).getApplicationACLs();
     doReturn(amContainerMap).when(appContext).getAllContainers();
-    NodeId nodeId = NodeId.newInstance("localhost", 0);
-    AMContainer amContainer = mock(AMContainer.class);
-    Container container = mock(Container.class);
-    doReturn(nodeId).when(container).getNodeId();
-    doReturn(amContainer).when(amContainerMap).get(any(ContainerId.class));
-    doReturn(container).when(amContainer).getContainer();
-
-    taskAttemptListener =
-        new TaskAttemptListenerImpTezDag(appContext, mock(TaskHeartbeatHandler.class),
-            mock(ContainerHeartbeatHandler.class), null);
-    TezTaskCommunicatorImpl taskCommunicator = (TezTaskCommunicatorImpl)taskAttemptListener.getTaskCommunicator();
-    TezTaskUmbilicalProtocol tezUmbilical = taskCommunicator.getUmbilical();
+
+    taskAttemptListener = new TaskAttemptListenerImplForTest(appContext,
+        mock(TaskHeartbeatHandler.class), mock(ContainerHeartbeatHandler.class), null);
 
     taskSpec = mock(TaskSpec.class);
     doReturn(taskAttemptID).when(taskSpec).getTaskAttemptID();
@@ -131,6 +123,9 @@ public class TestTaskAttemptListenerImplTezDag {
   @Test(timeout = 5000)
   public void testGetTask() throws IOException {
 
+    TezTaskCommunicatorImpl taskCommunicator = (TezTaskCommunicatorImpl)taskAttemptListener.getTaskCommunicator();
+    TezTaskUmbilicalProtocol tezUmbilical = taskCommunicator.getUmbilical();
+
     ContainerId containerId1 = createContainerId(appId, 1);
     ContainerContext containerContext1 = new ContainerContext(containerId1.toString());
     containerTask = tezUmbilical.getTask(containerContext1);

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
index 89b77a7..54b9adb 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
@@ -1322,7 +1322,7 @@ public class TestContainerReuse {
           InputDescriptor.create("inputClassName"), 1)),
       Collections.singletonList(new OutputSpec("vertexName",
           OutputDescriptor.create("outputClassName"), 1)), null), ta, locationHint,
-      priority.getPriority(), containerContext);
+      priority.getPriority(), containerContext, 0, 0, 0);
     return lr;
   }
 

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
index af3e40d..291e786 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
@@ -89,13 +89,13 @@ public class TestTaskSchedulerEventHandler {
     public MockTaskSchedulerEventHandler(AppContext appContext,
         DAGClientServer clientService, EventHandler eventHandler,
         ContainerSignatureMatcher containerSignatureMatcher, WebUIService webUI) {
-      super(appContext, clientService, eventHandler, containerSignatureMatcher, webUI);
+      super(appContext, clientService, eventHandler, containerSignatureMatcher, webUI, new String[] {});
     }
-    
+
     @Override
-    protected TaskSchedulerService createTaskScheduler(String host, int port,
-        String trackingUrl, AppContext appContext) {
-      return mockTaskScheduler;
+    protected void instantiateScheduelrs(String host, int port, String trackingUrl,
+                                         AppContext appContext) {
+      taskSchedulers[0] = mockTaskScheduler;
     }
     
     @Override
@@ -194,7 +194,7 @@ public class TestTaskSchedulerEventHandler {
     when(mockAppContext.getCurrentDAG().getVertex(affVertexName)).thenReturn(affVertex);
     Resource resource = Resource.newInstance(100, 1);
     AMSchedulerEventTALaunchRequest event = new AMSchedulerEventTALaunchRequest
-        (taId, resource, null, mockTaskAttempt, locHint, 3, null);
+        (taId, resource, null, mockTaskAttempt, locHint, 3, null, 0, 0, 0);
     schedulerHandler.notify.set(false);
     schedulerHandler.handle(event);
     synchronized (schedulerHandler.notify) {

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
index 77c98b7..d775300 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
@@ -127,31 +127,29 @@ class TestTaskSchedulerHelpers {
         EventHandler eventHandler,
         TezAMRMClientAsync<CookieContainerRequest> amrmClientAsync,
         ContainerSignatureMatcher containerSignatureMatcher) {
-      super(appContext, null, eventHandler, containerSignatureMatcher, null);
+      super(appContext, null, eventHandler, containerSignatureMatcher, null, new String[]{});
       this.amrmClientAsync = amrmClientAsync;
       this.containerSignatureMatcher = containerSignatureMatcher;
     }
 
     @Override
-    public TaskSchedulerService createTaskScheduler(String host, int port,
-        String trackingUrl, AppContext appContext) {
-      return new TaskSchedulerWithDrainableAppCallback(this,
+    public void instantiateScheduelrs(String host, int port, String trackingUrl, AppContext appContext) {
+      taskSchedulers[0] = new TaskSchedulerWithDrainableAppCallback(this,
           containerSignatureMatcher, host, port, trackingUrl, amrmClientAsync,
           appContext);
     }
 
     public TaskSchedulerService getSpyTaskScheduler() {
-      return this.taskScheduler;
+      return taskSchedulers[0];
     }
 
     @Override
     public void serviceStart() {
-      TaskSchedulerService taskSchedulerReal = createTaskScheduler("host", 0, "",
-        appContext);
+      instantiateScheduelrs("host", 0, "", appContext);
       // Init the service so that reuse configuration is picked up.
-      ((AbstractService)taskSchedulerReal).init(getConfig());
-      ((AbstractService)taskSchedulerReal).start();
-      taskScheduler = spy(taskSchedulerReal);
+      ((AbstractService)taskSchedulers[0]).init(getConfig());
+      ((AbstractService)taskSchedulers[0]).start();
+      taskSchedulers[0] = spy(taskSchedulers[0]);
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
index a93c1a4..ae7e7f8 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
@@ -45,6 +45,8 @@ public class TestExternalTezServices {
 
   private static final Log LOG = LogFactory.getLog(TestExternalTezServices.class);
 
+  private static final String EXT_PUSH_ENTITY_NAME = "ExtServiceTestPush";
+
   private static MiniTezCluster tezCluster;
   private static MiniDFSCluster dfsCluster;
   private static MiniTezTestServiceCluster tezTestServiceCluster;
@@ -106,12 +108,17 @@ public class TestExternalTezServices {
     remoteFs.mkdirs(stagingDirPath);
     // This is currently configured to push tasks into the Service, and then use the standard RPC
     confForJobs.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
-    confForJobs.set(TezConfiguration.TEZ_AM_TASK_SCHEDULER_CLASS,
-        TezTestServiceTaskSchedulerService.class.getName());
-    confForJobs.set(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHER_CLASS,
-        TezTestServiceNoOpContainerLauncher.class.getName());
-    confForJobs.set(TezConfiguration.TEZ_AM_TASK_COMMUNICATOR_CLASS,
-        TezTestServiceTaskCommunicatorImpl.class.getName());
+    confForJobs.set(TezConfiguration.TEZ_AM_TASK_SCHEDULERS,
+        EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceTaskSchedulerService.class.getName());
+    confForJobs.set(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHERS,
+        EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceNoOpContainerLauncher.class.getName());
+    confForJobs.set(TezConfiguration.TEZ_AM_TASK_COMMUNICATORS,
+        EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceTaskCommunicatorImpl.class.getName());
+
+    confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME, EXT_PUSH_ENTITY_NAME);
+    confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME, EXT_PUSH_ENTITY_NAME);
+    confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME, EXT_PUSH_ENTITY_NAME);
+
 
     TezConfiguration tezConf = new TezConfiguration(confForJobs);
 


[39/43] tez git commit: TEZ-2361. Propagate dag completion to TaskCommunicator. (sseth)

Posted by ss...@apache.org.
TEZ-2361. Propagate dag completion to TaskCommunicator. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/01374671
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/01374671
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/01374671

Branch: refs/heads/TEZ-2003
Commit: 0137467140aaee7d3dd2f75264d34544972a45ef
Parents: 44f5e8f
Author: Siddharth Seth <ss...@apache.org>
Authored: Thu Apr 23 17:26:25 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:55 2015 -0700

----------------------------------------------------------------------
 TEZ-2003-CHANGES.txt                            |  1 +
 .../apache/tez/dag/api/TaskCommunicator.java    | 12 +++-
 .../org/apache/tez/dag/app/DAGAppMaster.java    |  4 +-
 .../dag/app/TaskAttemptListenerImpTezDag.java   | 17 +++++-
 .../dag/app/TaskCommunicatorContextImpl.java    | 64 +++++++++++++++++---
 .../tez/dag/app/TezTaskCommunicatorImpl.java    |  5 ++
 .../tez/dag/app/launcher/ContainerLauncher.java |  3 -
 .../dag/app/launcher/ContainerLauncherImpl.java | 12 ----
 .../app/launcher/ContainerLauncherRouter.java   | 10 +++
 .../app/launcher/LocalContainerLauncher.java    |  9 ---
 .../apache/tez/dag/app/MockDAGAppMaster.java    | 11 ----
 .../rm/TezTestServiceTaskSchedulerService.java  |  2 +-
 .../TezTestServiceTaskCommunicatorImpl.java     |  2 +-
 .../tez/tests/TestExternalTezServices.java      |  2 +
 14 files changed, 103 insertions(+), 51 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 7c13110..f6bc8e7 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -17,5 +17,6 @@ ALL CHANGES:
   TEZ-2285. Allow TaskCommunicators to indicate task/container liveness.
   TEZ-2302. Allow TaskCommunicators to subscribe for Vertex updates.
   TEZ-2347. Expose additional information in TaskCommunicatorContext.
+  TEZ-2361. Propagate dag completion to TaskCommunicator.
 
 INCOMPATIBLE CHANGES:

http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
index a2cd858..cadca0c 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
@@ -14,7 +14,6 @@
 
 package org.apache.tez.dag.api;
 
-import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.util.Map;
 
@@ -74,4 +73,15 @@ public abstract class TaskCommunicator extends AbstractService {
    * @throws Exception
    */
   public abstract void onVertexStateUpdated(VertexStateUpdate stateUpdate) throws Exception;
+
+  /**
+   * Indicates the current running dag is complete. The TaskCommunicatorContext can be used to
+   * query information about the current dag during the duration of the dagComplete invocation.
+   *
+   * After this, the contents returned from querying the context may change at any point - due to
+   * the next dag being submitted.
+   */
+  // TODO TEZ-2003 This is extremely difficult to use. Add the dagStarted notification, and potentially
+  // throw exceptions between a dagComplete and dagStart invocation.
+  public abstract void dagComplete(String dagName);
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index 1ea369e..568e929 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -759,7 +759,7 @@ public class DAGAppMaster extends AbstractService {
       DAGAppMasterEventDagCleanup cleanupEvent = (DAGAppMasterEventDagCleanup) event;
       LOG.info("Cleaning up DAG: name=" + cleanupEvent.getDag().getName() + ", with id=" +
           cleanupEvent.getDag().getID());
-      containerLauncher.dagComplete(cleanupEvent.getDag());
+      containerLauncherRouter.dagComplete(cleanupEvent.getDag());
       taskAttemptListener.dagComplete(cleanupEvent.getDag());
       nodes.dagComplete(cleanupEvent.getDag());
       containers.dagComplete(cleanupEvent.getDag());
@@ -773,7 +773,7 @@ public class DAGAppMaster extends AbstractService {
     case NEW_DAG_SUBMITTED:
       // Inform sub-components that a new DAG has been submitted.
       taskSchedulerEventHandler.dagSubmitted();
-      containerLauncher.dagSubmitted();
+      containerLauncherRouter.dagSubmitted();
       taskAttemptListener.dagSubmitted();
       break;
     default:

http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index aaf9cca..03b5602 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -81,6 +81,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
 
   private final AppContext context;
   private final TaskCommunicator[] taskCommunicators;
+  private final TaskCommunicatorContext[] taskCommunicatorContexts;
 
   protected final TaskHeartbeatHandler taskHeartbeatHandler;
   protected final ContainerHeartbeatHandler containerHeartbeatHandler;
@@ -123,7 +124,9 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
       }
     }
     this.taskCommunicators = new TaskCommunicator[taskCommunicatorClassIdentifiers.length];
+    this.taskCommunicatorContexts = new TaskCommunicatorContext[taskCommunicatorClassIdentifiers.length];
     for (int i = 0 ; i < taskCommunicatorClassIdentifiers.length ; i++) {
+      taskCommunicatorContexts[i] = new TaskCommunicatorContextImpl(context, this, i);
       taskCommunicators[i] = createTaskCommunicator(taskCommunicatorClassIdentifiers[i], i);
     }
     // TODO TEZ-2118 Start using taskCommunicator indices properly
@@ -148,10 +151,10 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
   private TaskCommunicator createTaskCommunicator(String taskCommClassIdentifier, int taskCommIndex) {
     if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
       LOG.info("Using Default Task Communicator");
-      return new TezTaskCommunicatorImpl(new TaskCommunicatorContextImpl(context, this, taskCommIndex));
+      return new TezTaskCommunicatorImpl(taskCommunicatorContexts[taskCommIndex]);
     } else if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
       LOG.info("Using Default Local Task Communicator");
-      return new TezLocalTaskCommunicatorImpl(new TaskCommunicatorContextImpl(context, this, taskCommIndex));
+      return new TezLocalTaskCommunicatorImpl(taskCommunicatorContexts[taskCommIndex]);
     } else {
       LOG.info("Using TaskCommunicator: " + taskCommClassIdentifier);
       Class<? extends TaskCommunicator> taskCommClazz = (Class<? extends TaskCommunicator>) ReflectionUtils
@@ -159,7 +162,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
       try {
         Constructor<? extends TaskCommunicator> ctor = taskCommClazz.getConstructor(TaskCommunicatorContext.class);
         ctor.setAccessible(true);
-        return ctor.newInstance(new TaskCommunicatorContextImpl(context, this, taskCommIndex));
+        return ctor.newInstance(taskCommunicatorContexts[taskCommIndex]);
       } catch (NoSuchMethodException e) {
         throw new TezUncheckedException(e);
       } catch (InvocationTargetException e) {
@@ -318,6 +321,14 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
     // This becomes more relevant when task kills without container kills are allowed.
 
     // TODO TEZ-2336. Send a signal to containers indicating DAG completion.
+
+    // Inform all communicators of the dagCompletion.
+    for (int i = 0 ; i < taskCommunicators.length ; i++) {
+      ((TaskCommunicatorContextImpl)taskCommunicatorContexts[i]).dagCompleteStart(dag);
+      taskCommunicators[i].dagComplete(dag.getName());
+      ((TaskCommunicatorContextImpl)taskCommunicatorContexts[i]).dagCompleteEnd();
+    }
+
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
index 4cb0c93..790066f 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
@@ -17,6 +17,11 @@ package org.apache.tez.dag.app;
 import javax.annotation.Nullable;
 import java.io.IOException;
 import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
 
 import com.google.common.base.Function;
 import com.google.common.base.Preconditions;
@@ -33,6 +38,7 @@ import org.apache.tez.dag.api.TezException;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.api.event.VertexState;
 import org.apache.tez.dag.api.event.VertexStateUpdate;
+import org.apache.tez.dag.app.dag.DAG;
 import org.apache.tez.dag.app.dag.Vertex;
 import org.apache.tez.dag.app.dag.VertexStateUpdateListener;
 import org.apache.tez.dag.records.TezTaskAttemptID;
@@ -44,6 +50,10 @@ public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, Ver
   private final AppContext context;
   private final TaskAttemptListenerImpTezDag taskAttemptListener;
   private final int taskCommunicatorIndex;
+  private final ReentrantReadWriteLock.ReadLock dagChangedReadLock;
+  private final ReentrantReadWriteLock.WriteLock dagChangedWriteLock;
+
+  private DAG dag;
 
   public TaskCommunicatorContextImpl(AppContext appContext,
                                      TaskAttemptListenerImpTezDag taskAttemptListener,
@@ -51,6 +61,10 @@ public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, Ver
     this.context = appContext;
     this.taskAttemptListener = taskAttemptListener;
     this.taskCommunicatorIndex = taskCommunicatorIndex;
+
+    ReentrantReadWriteLock dagChangedLock = new ReentrantReadWriteLock();
+    dagChangedReadLock = dagChangedLock.readLock();
+    dagChangedWriteLock = dagChangedLock.writeLock();
   }
 
   @Override
@@ -111,18 +125,19 @@ public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, Ver
   public void registerForVertexStateUpdates(String vertexName,
                                             @Nullable Set<VertexState> stateSet) {
     Preconditions.checkNotNull(vertexName, "VertexName cannot be null: " + vertexName);
-    context.getCurrentDAG().getStateChangeNotifier().registerForVertexUpdates(vertexName, stateSet, this);
+    getDag().getStateChangeNotifier().registerForVertexUpdates(vertexName, stateSet,
+        this);
   }
 
   @Override
   public String getCurretnDagName() {
-    return context.getCurrentDAG().getName();
+    return getDag().getName();
   }
 
   @Override
   public Iterable<String> getInputVertexNames(String vertexName) {
     Preconditions.checkNotNull(vertexName, "VertexName cannot be null: " + vertexName);
-    Vertex vertex = context.getCurrentDAG().getVertex(vertexName);
+    Vertex vertex = getDag().getVertex(vertexName);
     Set<Vertex> sources = vertex.getInputVertices().keySet();
     return Iterables.transform(sources, new Function<Vertex, String>() {
       @Override
@@ -135,31 +150,32 @@ public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, Ver
   @Override
   public int getVertexTotalTaskCount(String vertexName) {
     Preconditions.checkArgument(vertexName != null, "VertexName must be specified");
-    return context.getCurrentDAG().getVertex(vertexName).getTotalTasks();
+    return getDag().getVertex(vertexName).getTotalTasks();
   }
 
   @Override
   public int getVertexCompletedTaskCount(String vertexName) {
     Preconditions.checkArgument(vertexName != null, "VertexName must be specified");
-    return context.getCurrentDAG().getVertex(vertexName).getCompletedTasks();
+    return getDag().getVertex(vertexName).getCompletedTasks();
   }
 
   @Override
   public int getVertexRunningTaskCount(String vertexName) {
     Preconditions.checkArgument(vertexName != null, "VertexName must be specified");
-    return context.getCurrentDAG().getVertex(vertexName).getRunningTasks();
+    return getDag().getVertex(vertexName).getRunningTasks();
   }
 
   @Override
   public long getFirstAttemptStartTime(String vertexName, int taskIndex) {
     Preconditions.checkArgument(vertexName != null, "VertexName must be specified");
     Preconditions.checkArgument(taskIndex >=0, "TaskIndex must be > 0");
-    return context.getCurrentDAG().getVertex(vertexName).getTask(taskIndex).getFirstAttemptStartTime();
+    return getDag().getVertex(vertexName).getTask(
+        taskIndex).getFirstAttemptStartTime();
   }
 
   @Override
   public long getDagStartTime() {
-    return context.getCurrentDAG().getStartTime();
+    return getDag().getStartTime();
   }
 
   @Override
@@ -171,4 +187,36 @@ public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, Ver
       throw new TezUncheckedException(e);
     }
   }
+
+  private DAG getDag() {
+    dagChangedReadLock.lock();
+    try {
+      if (dag != null) {
+        return dag;
+      } else {
+        return context.getCurrentDAG();
+      }
+    } finally {
+      dagChangedReadLock.unlock();
+    }
+  }
+
+  @InterfaceAudience.Private
+  public void dagCompleteStart(DAG dag) {
+    dagChangedWriteLock.lock();
+    try {
+      this.dag = dag;
+    } finally {
+      dagChangedWriteLock.unlock();
+    }
+  }
+
+  public void dagCompleteEnd() {
+    dagChangedWriteLock.lock();
+    try {
+      this.dag = null;
+    } finally {
+      dagChangedWriteLock.unlock();
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
index 1417a3b..825a4d2 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -258,6 +258,11 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
     // Empty. Not registering, or expecting any updates.
   }
 
+  @Override
+  public void dagComplete(String dagName) {
+    // Nothing to do at the moment. Some of the TODOs from TaskAttemptListener apply here.
+  }
+
   protected String getTokenIdentifier() {
     return tokenIdentifier;
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java
index 8a8498f..ea07a1d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java
@@ -26,7 +26,4 @@ import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
 public interface ContainerLauncher
     extends EventHandler<NMCommunicatorEvent> {
 
-    void dagComplete(DAG dag);
-
-    void dagSubmitted();
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherImpl.java
index 94889a1..a1eb2a7 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherImpl.java
@@ -30,7 +30,6 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.commons.lang.exception.ExceptionUtils;
-import org.apache.tez.dag.app.dag.DAG;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -111,17 +110,6 @@ public class ContainerLauncherImpl extends AbstractService implements
     }
   }
 
-  @Override
-  public void dagComplete(DAG dag) {
-    // Nothing required at the moment. Containers are shared across DAGs
-  }
-
-  @Override
-  public void dagSubmitted() {
-    // Nothing to do right now. Indicates that a new DAG has been submitted and
-    // the context has updated information.
-  }
-
   private static enum ContainerState {
     PREP, FAILED, RUNNING, DONE, KILLED_BEFORE_LAUNCH
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
index dd3571e..db145f4 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
@@ -27,6 +27,7 @@ import org.apache.tez.dag.api.TezConstants;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.dag.app.TaskAttemptListener;
+import org.apache.tez.dag.app.dag.DAG;
 import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -128,6 +129,15 @@ public class ContainerLauncherRouter extends AbstractService
     }
   }
 
+  public void dagComplete(DAG dag) {
+    // Nothing required at the moment. Containers are shared across DAGs
+  }
+
+  public void dagSubmitted() {
+    // Nothing to do right now. Indicates that a new DAG has been submitted and
+    // the context has updated information.
+  }
+
 
   @Override
   public void handle(NMCommunicatorEvent event) {

http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
index 18b2e35..305f8b3 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
@@ -44,7 +44,6 @@ import com.google.common.util.concurrent.ListeningExecutorService;
 import com.google.common.util.concurrent.MoreExecutors;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
 
-import org.apache.tez.dag.app.dag.DAG;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -169,14 +168,6 @@ public class LocalContainerLauncher extends AbstractService implements
     callbackExecutor.shutdownNow();
   }
 
-  @Override
-  public void dagComplete(DAG dag) {
-  }
-
-  @Override
-  public void dagSubmitted() {
-  }
-
   // Thread to monitor the queue of incoming NMCommunicator events
   private class TezSubTaskRunner implements Runnable {
     @Override

http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index 051497b..845c440 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -34,10 +34,8 @@ import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 
-import org.apache.tez.dag.app.dag.DAG;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.service.AbstractService;
@@ -54,7 +52,6 @@ import org.apache.tez.common.counters.TezCounters;
 import org.apache.tez.dag.api.TaskHeartbeatRequest;
 import org.apache.tez.dag.api.TaskHeartbeatResponse;
 import org.apache.tez.dag.api.TezConfiguration;
-import org.apache.tez.dag.api.TaskCommunicator;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.app.launcher.ContainerLauncher;
 import org.apache.tez.dag.app.launcher.ContainerLauncherRouter;
@@ -150,14 +147,6 @@ public class MockDAGAppMaster extends DAGAppMaster {
       this.goFlag = goFlag;
     }
 
-    @Override
-    public void dagComplete(DAG dag) {
-    }
-
-    @Override
-    public void dagSubmitted() {
-    }
-
     public class ContainerData {
       ContainerId cId;
       TezTaskAttemptID taId;

http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
index 50dfb24..073cb50 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
@@ -158,7 +158,7 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
   }
 
   @Override
-  public void resetMatchLocalityForAllHeldContainers() {
+  public void dagComplete() {
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
index ef983c2..cf28b11 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
@@ -150,7 +150,7 @@ public class TezTestServiceTaskCommunicatorImpl extends TezTaskCommunicatorImpl
               t = se.getCause();
             }
             if (t instanceof RemoteException) {
-              RemoteException re = (RemoteException)t;
+              RemoteException re = (RemoteException) t;
               String message = re.toString();
               if (message.contains(RejectedExecutionException.class.getName())) {
                 getTaskCommunicatorContext().taskKilled(taskSpec.getTaskAttemptID(),

http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
index 4d0a610..45c70f1 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
@@ -39,6 +39,7 @@ import org.apache.tez.dag.app.taskcomm.TezTestServiceTaskCommunicatorImpl;
 import org.apache.tez.examples.HashJoinExample;
 import org.apache.tez.examples.JoinDataGen;
 import org.apache.tez.examples.JoinValidateConfigured;
+import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
 import org.apache.tez.runtime.library.processor.SleepProcessor;
 import org.apache.tez.service.MiniTezTestServiceCluster;
 import org.apache.tez.service.impl.ContainerRunnerImpl;
@@ -124,6 +125,7 @@ public class TestExternalTezServices {
     remoteFs.mkdirs(stagingDirPath);
     // This is currently configured to push tasks into the Service, and then use the standard RPC
     confForJobs.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
+    confForJobs.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, false);
 
     confForJobs.setStrings(TezConfiguration.TEZ_AM_TASK_SCHEDULERS,
         TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT,


[12/43] tez git commit: TEZ-776. Reduce AM mem usage caused by storing TezEvents (bikas)

Posted by ss...@apache.org.
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java
index 2ea0299..d177460 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java
@@ -33,9 +33,9 @@ import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability.Evolving;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.tez.common.TezUtils;
-import org.apache.tez.dag.api.EdgeManagerPlugin;
 import org.apache.tez.dag.api.EdgeManagerPluginContext;
 import org.apache.tez.dag.api.EdgeManagerPluginDescriptor;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand;
 import org.apache.tez.dag.api.EdgeProperty;
 import org.apache.tez.dag.api.EdgeProperty.DataMovementType;
 import org.apache.tez.dag.api.InputDescriptor;
@@ -160,12 +160,15 @@ public class ShuffleVertexManager extends VertexManagerPlugin {
     super(context);
   }
 
-  public static class CustomShuffleEdgeManager extends EdgeManagerPlugin {
+  public static class CustomShuffleEdgeManager extends EdgeManagerPluginOnDemand {
     int numSourceTaskOutputs;
     int numDestinationTasks;
     int basePartitionRange;
     int remainderRangeForLastShuffler;
     int numSourceTasks;
+    
+    int[][] sourceIndices;
+    int[][] targetIndices;
 
     public CustomShuffleEdgeManager(EdgeManagerPluginContext context) {
       super(context);
@@ -231,8 +234,106 @@ public class ShuffleVertexManager extends VertexManagerPlugin {
 
       destinationTaskAndInputIndices.put(destinationTaskIndex, Collections.singletonList(targetIndex));
     }
+
+    @Override
+    public EventRouteMetadata routeDataMovementEventToDestination(
+        int sourceTaskIndex, int sourceOutputIndex, int destTaskIndex) throws Exception {
+      int sourceIndex = sourceOutputIndex;
+      int destinationTaskIndex = sourceIndex/basePartitionRange;
+      if (destinationTaskIndex != destTaskIndex) {
+        return null;
+      }
+      int partitionRange = 1;
+      if(destinationTaskIndex < numDestinationTasks-1) {
+        partitionRange = basePartitionRange;
+      } else {
+        partitionRange = remainderRangeForLastShuffler;
+      }
+      
+      // all inputs from a source task are next to each other in original order
+      int targetIndex = 
+          sourceTaskIndex * partitionRange 
+          + sourceIndex % partitionRange;
+      return EventRouteMetadata.create(1, new int[]{targetIndex});
+    }
+    
+    private int[] createIndices(int partitionRange, int taskIndex, int offSetPerTask) {
+      int startIndex = taskIndex * offSetPerTask;
+      int[] indices = new int[partitionRange];
+      for (int currentIndex = 0; currentIndex < partitionRange; ++currentIndex) {
+        indices[currentIndex] = (startIndex + currentIndex);
+      }
+      return indices;      
+    }
+    
+    @Override
+    public void prepareForRouting() throws Exception {
+      // target indices derive from num src tasks
+      int numSourceTasks = getContext().getSourceVertexNumTasks();
+      targetIndices = new int[numSourceTasks][];
+      for (int srcTaskIndex=0; srcTaskIndex<numSourceTasks; ++srcTaskIndex) {
+        targetIndices[srcTaskIndex] = createIndices(basePartitionRange, srcTaskIndex,
+            basePartitionRange);
+      }
+      
+      // source indices derive from num dest tasks (==partitions)
+      int numTargetTasks = getContext().getDestinationVertexNumTasks();
+      sourceIndices = new int[numTargetTasks][];
+      for (int destTaskIndex=0; destTaskIndex<numTargetTasks; ++destTaskIndex) {
+        int partitionRange = basePartitionRange;
+        if (destTaskIndex == (numTargetTasks-1)) {
+          partitionRange = remainderRangeForLastShuffler;
+        }
+        // skip the basePartitionRange per destination task
+        sourceIndices[destTaskIndex] = createIndices(partitionRange, destTaskIndex,
+            basePartitionRange);
+      }
+    }
+
+    private int[] createTargetIndicesForRemainder(int srcTaskIndex) {
+      // for the last task just generate on the fly instead of doubling the memory
+      return createIndices(remainderRangeForLastShuffler, srcTaskIndex,
+          remainderRangeForLastShuffler);
+    }
     
     @Override
+    public @Nullable EventRouteMetadata routeCompositeDataMovementEventToDestination(
+        int sourceTaskIndex, int destinationTaskIndex)
+        throws Exception {
+      int[] targetIndicesToSend;
+      int partitionRange;
+      if(destinationTaskIndex == (numDestinationTasks-1)) {
+        if (remainderRangeForLastShuffler != basePartitionRange) {
+          targetIndicesToSend = createTargetIndicesForRemainder(sourceTaskIndex);
+        } else {
+          targetIndicesToSend = targetIndices[sourceTaskIndex];
+        }
+        partitionRange = remainderRangeForLastShuffler;
+      } else {
+        targetIndicesToSend = targetIndices[sourceTaskIndex];
+        partitionRange = basePartitionRange;
+      }
+
+      return EventRouteMetadata.create(partitionRange, targetIndicesToSend, 
+          sourceIndices[destinationTaskIndex]);
+    }
+
+    @Override
+    public EventRouteMetadata routeInputSourceTaskFailedEventToDestination(
+        int sourceTaskIndex, int destinationTaskIndex) throws Exception {
+      int partitionRange = basePartitionRange;
+      if (destinationTaskIndex == (numDestinationTasks-1)) {
+        partitionRange = remainderRangeForLastShuffler;
+      }
+      int startOffset = sourceTaskIndex * partitionRange;        
+      int[] targetIndices = new int[partitionRange];
+      for (int i=0; i<partitionRange; ++i) {
+        targetIndices[i] = (startOffset + i);
+      }
+      return EventRouteMetadata.create(partitionRange, targetIndices);
+    }
+
+    @Override
     public void routeInputSourceTaskFailedEventToDestination(int sourceTaskIndex, 
         Map<Integer, List<Integer>> destinationTaskAndInputIndices) {
       if (remainderRangeForLastShuffler < basePartitionRange) {
@@ -281,6 +382,18 @@ public class ShuffleVertexManager extends VertexManagerPlugin {
     }
 
     @Override
+    public int routeInputErrorEventToSource(int destinationTaskIndex,
+        int destinationFailedInputIndex) {
+      int partitionRange = 1;
+      if(destinationTaskIndex < numDestinationTasks-1) {
+        partitionRange = basePartitionRange;
+      } else {
+        partitionRange = remainderRangeForLastShuffler;
+      }
+      return destinationFailedInputIndex/partitionRange;
+    }
+
+    @Override
     public int getNumDestinationConsumerTasks(int sourceTaskIndex) {
       return numDestinationTasks;
     }

http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-tests/src/test/java/org/apache/tez/test/TestExceptionPropagation.java
----------------------------------------------------------------------
diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestExceptionPropagation.java b/tez-tests/src/test/java/org/apache/tez/test/TestExceptionPropagation.java
index 70be21b..7ba6028 100644
--- a/tez-tests/src/test/java/org/apache/tez/test/TestExceptionPropagation.java
+++ b/tez-tests/src/test/java/org/apache/tez/test/TestExceptionPropagation.java
@@ -308,7 +308,7 @@ public class TestExceptionPropagation {
     // EdgeManager
     EM_Initialize, EM_GetNumDestinationTaskPhysicalInputs, EM_GetNumSourceTaskPhysicalOutputs,
     EM_RouteDataMovementEventToDestination, EM_GetNumDestinationConsumerTasks,
-    EM_RouteInputErrorEventToSource,
+    EM_RouteInputErrorEventToSource, EM_PrepareForRouting,
     // Not Supported yet
     // EM_RouteInputSourceTaskFailedEventToDestination,
 
@@ -814,6 +814,33 @@ public class TestExceptionPropagation {
       super.routeDataMovementEventToDestination(event, sourceTaskIndex,
           sourceOutputIndex, destinationTaskAndInputIndices);
     }
+    
+    @Override
+    public void prepareForRouting() throws Exception {
+      if (exLocation == ExceptionLocation.EM_PrepareForRouting) {
+        throw new RuntimeException(exLocation.name());
+      }
+      super.prepareForRouting();
+    }
+    
+    @Override
+    public EventRouteMetadata routeDataMovementEventToDestination(
+        int sourceTaskIndex, int sourceOutputIndex, int destinationTaskIndex) throws Exception {
+      if (exLocation == ExceptionLocation.EM_RouteDataMovementEventToDestination) {
+        throw new RuntimeException(exLocation.name());
+      }
+      return super.routeDataMovementEventToDestination(sourceTaskIndex, sourceOutputIndex, destinationTaskIndex);
+    }
+
+    @Override
+    public EventRouteMetadata routeCompositeDataMovementEventToDestination(
+        int sourceTaskIndex, int destinationTaskIndex)
+        throws Exception {
+      if (exLocation == ExceptionLocation.EM_RouteDataMovementEventToDestination) {
+        throw new RuntimeException(exLocation.name());
+      }
+      return super.routeCompositeDataMovementEventToDestination(sourceTaskIndex, destinationTaskIndex);
+    }
 
     @Override
     public int routeInputErrorEventToSource(InputReadErrorEvent event,
@@ -826,6 +853,16 @@ public class TestExceptionPropagation {
     }
 
     @Override
+    public int routeInputErrorEventToSource(int destinationTaskIndex,
+        int destinationFailedInputIndex) {
+      if (exLocation == ExceptionLocation.EM_RouteInputErrorEventToSource) {
+        throw new RuntimeException(exLocation.name());
+      }
+      return super.routeInputErrorEventToSource(destinationTaskIndex,
+          destinationFailedInputIndex);
+    }
+
+    @Override
     public void routeInputSourceTaskFailedEventToDestination(
         int sourceTaskIndex,
         Map<Integer, List<Integer>> destinationTaskAndInputIndices) {