You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by ss...@apache.org on 2015/05/09 03:42:39 UTC
[01/43] tez git commit: TEZ-2406. Tez UI: Display per-io counter
columns in task and attempt pages under vertex (Sreenath Somarajapuram via
pramachandran)
Repository: tez
Updated Branches:
refs/heads/TEZ-2003 2562c59f0 -> d03e330fa (forced update)
TEZ-2406. Tez UI: Display per-io counter columns in task and attempt pages under vertex (Sreenath Somarajapuram via pramachandran)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/12ef073c
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/12ef073c
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/12ef073c
Branch: refs/heads/TEZ-2003
Commit: 12ef073c205ce7b09827a7ecbeea457d589c1c5f
Parents: 5679b28
Author: Prakash Ramachandran <pr...@hortonworks.com>
Authored: Wed May 6 15:36:49 2015 +0530
Committer: Prakash Ramachandran <pr...@hortonworks.com>
Committed: Wed May 6 15:36:49 2015 +0530
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../components/basic-table/search-view.js | 2 +-
.../task_task_attempts_controller.js | 16 +---
.../vertex_task_attempts_controller.js | 16 +---
.../controllers/vertex_tasks_controller.js | 16 +---
.../src/main/webapp/app/scripts/helpers/misc.js | 94 +++++++++++++++++++-
.../scripts/mixins/auto-counter-column-mixin.js | 58 ++++++++++++
.../app/scripts/mixins/column-selector-mixin.js | 85 ++++++++++++++----
.../main/webapp/app/scripts/views/checkbox.js | 29 ++++++
.../app/scripts/views/multi-select-view.js | 72 +++++++++++++++
tez-ui/src/main/webapp/app/styles/colors.less | 1 +
tez-ui/src/main/webapp/app/styles/main.less | 59 ++++++++++--
tez-ui/src/main/webapp/app/styles/shared.less | 4 +
.../main/webapp/app/templates/common/table.hbs | 42 ++++-----
.../app/templates/components/basic-table.hbs | 41 +++++----
.../components/basic-table/header-cell.hbs | 2 +-
.../components/basic-table/search-view.hbs | 2 +-
tez-ui/src/main/webapp/app/templates/dags.hbs | 3 -
.../webapp/app/templates/views/multi-select.hbs | 55 ++++++++++++
19 files changed, 491 insertions(+), 107 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 2446f6a..f060a8c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -15,6 +15,7 @@ INCOMPATIBLE CHANGES
TEZ-1993. Implement a pluggable InputSizeEstimator for grouping fairly
ALL CHANGES:
+ TEZ-2406. Tez UI: Display per-io counter columns in task and attempt pages under vertex
TEZ-2384. Add warning message in the case of prewarn under non-session mode.
TEZ-2415. PMC RDF needs to use asfext:pmc, not asfext:PMC.
TEZ-1752. Inputs / Outputs in the Runtime library should be interruptable.
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/components/basic-table/search-view.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/components/basic-table/search-view.js b/tez-ui/src/main/webapp/app/scripts/components/basic-table/search-view.js
index 4dd41a1..36a2d4f 100644
--- a/tez-ui/src/main/webapp/app/scripts/components/basic-table/search-view.js
+++ b/tez-ui/src/main/webapp/app/scripts/components/basic-table/search-view.js
@@ -23,7 +23,7 @@ App.BasicTableComponent.SearchView = Ember.View.extend({
text: '',
_boundText: function () {
- return this.get('text');
+ return this.get('text') || '';
}.property(),
_validRegEx: function () {
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js b/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js
index 558a740..c5c9eea 100644
--- a/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js
+++ b/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js
@@ -16,12 +16,13 @@
* limitations under the License.
*/
-App.TaskAttemptsController = App.TablePageController.extend({
+App.TaskAttemptsController = App.TablePageController.extend(App.AutoCounterColumnMixin, {
controllerName: 'TaskAttemptsController',
needs: "task",
entityType: 'taskTaskAttempt',
+ baseEntityType: 'taskAttempt',
filterEntityType: 'task',
filterEntityId: Ember.computed.alias('controllers.task.id'),
@@ -193,18 +194,7 @@ App.TaskAttemptsController = App.TablePageController.extend({
}
}
];
- }.property(),
-
- columnConfigs: function() {
- return this.get('defaultColumnConfigs').concat(
- App.Helpers.misc.normalizeCounterConfigs(
- App.get('Configs.defaultCounters').concat(
- App.get('Configs.tables.entity.taskAttempt') || [],
- App.get('Configs.tables.sharedColumns') || []
- )
- )
- );
- }.property(),
+ }.property('filterEntityId'),
});
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/controllers/vertex_task_attempts_controller.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/controllers/vertex_task_attempts_controller.js b/tez-ui/src/main/webapp/app/scripts/controllers/vertex_task_attempts_controller.js
index f395b40..b4ed89a 100644
--- a/tez-ui/src/main/webapp/app/scripts/controllers/vertex_task_attempts_controller.js
+++ b/tez-ui/src/main/webapp/app/scripts/controllers/vertex_task_attempts_controller.js
@@ -16,12 +16,13 @@
* limitations under the License.
*/
-App.VertexTaskAttemptsController = App.TablePageController.extend({
+App.VertexTaskAttemptsController = App.TablePageController.extend(App.AutoCounterColumnMixin, {
controllerName: 'VertexTaskAttemptsController',
needs: "vertex",
entityType: 'vertexTaskAttempt',
+ baseEntityType: 'taskAttempt',
filterEntityType: 'vertex',
filterEntityId: Ember.computed.alias('controllers.vertex.id'),
@@ -196,17 +197,6 @@ App.VertexTaskAttemptsController = App.TablePageController.extend({
}
}
];
- }.property(),
-
- columnConfigs: function() {
- return this.get('defaultColumnConfigs').concat(
- App.Helpers.misc.normalizeCounterConfigs(
- App.get('Configs.defaultCounters').concat(
- App.get('Configs.tables.entity.taskAttempt') || [],
- App.get('Configs.tables.sharedColumns') || []
- )
- )
- );
- }.property(),
+ }.property('filterEntityId'),
});
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/controllers/vertex_tasks_controller.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/controllers/vertex_tasks_controller.js b/tez-ui/src/main/webapp/app/scripts/controllers/vertex_tasks_controller.js
index 953ffcd..2cc0518 100644
--- a/tez-ui/src/main/webapp/app/scripts/controllers/vertex_tasks_controller.js
+++ b/tez-ui/src/main/webapp/app/scripts/controllers/vertex_tasks_controller.js
@@ -16,12 +16,13 @@
* limitations under the License.
*/
-App.VertexTasksController = App.TablePageController.extend({
+App.VertexTasksController = App.TablePageController.extend(App.AutoCounterColumnMixin, {
controllerName: 'VertexTasksController',
needs: "vertex",
entityType: 'vertexTask',
+ baseEntityType: 'task',
filterEntityType: 'vertex',
filterEntityId: Ember.computed.alias('controllers.vertex.id'),
@@ -172,17 +173,6 @@ App.VertexTasksController = App.TablePageController.extend({
}
}
];
- }.property('id'),
-
- columnConfigs: function() {
- return this.get('defaultColumnConfigs').concat(
- App.Helpers.misc.normalizeCounterConfigs(
- App.get('Configs.defaultCounters').concat(
- App.get('Configs.tables.entity.task') || [],
- App.get('Configs.tables.sharedColumns') || []
- )
- )
- );
- }.property(),
+ }.property('filterEntityId')
});
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/helpers/misc.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/helpers/misc.js b/tez-ui/src/main/webapp/app/scripts/helpers/misc.js
index 6794e95..81a7693 100644
--- a/tez-ui/src/main/webapp/app/scripts/helpers/misc.js
+++ b/tez-ui/src/main/webapp/app/scripts/helpers/misc.js
@@ -109,6 +109,50 @@ App.Helpers.misc = {
return classPath.substr(classPath.lastIndexOf('.') + 1);
},
+ /**
+ * Return a normalized group name for a counter name
+ * @param groupName {String}
+ * @return Normlaized name
+ */
+ getCounterGroupDisplayName: function (groupName) {
+ var displayName = App.Helpers.misc.getClassName(groupName), // Remove path
+ ioParts,
+ toText;
+
+ function removeCounterFromEnd(text) {
+ if(text.substr(-7) == 'Counter') {
+ text = text.substr(0, text.length - 7);
+ }
+ return text;
+ }
+
+ displayName = removeCounterFromEnd(displayName);
+
+ // Reformat per-io counters
+ switch(App.Helpers.misc.checkIOCounterGroup(displayName)) {
+ case 'in':
+ ioParts = displayName.split('_INPUT_');
+ toText = 'to %@ Input'.fmt(ioParts[1]);
+ break;
+ case 'out':
+ ioParts = displayName.split('_OUTPUT_');
+ toText = 'to %@ Output'.fmt(ioParts[1]);
+ break;
+ }
+ if(ioParts) {
+ ioParts = ioParts[0].split('_');
+ if(ioParts.length > 1) {
+ displayName = '%@ - %@ %@'.fmt(
+ removeCounterFromEnd(ioParts.shift()),
+ ioParts.join('_'),
+ toText
+ );
+ }
+ }
+
+ return displayName;
+ },
+
/*
* Normalizes counter style configurations
* @param counterConfigs Array
@@ -116,9 +160,14 @@ App.Helpers.misc = {
*/
normalizeCounterConfigs: function (counterConfigs) {
return counterConfigs.map(function (configuration) {
- configuration.headerCellName = configuration.counterName || configuration.counterId;
- configuration.id = '%@/%@'.fmt(configuration.counterGroupName || configuration.groupId,
- configuration.counterName || configuration.counterId),
+ var groupName = configuration.counterGroupName || configuration.groupId,
+ counterName = configuration.counterName || configuration.counterId;
+
+ configuration.headerCellName = '%@ - %@'.fmt(
+ App.Helpers.misc.getCounterGroupDisplayName(groupName),
+ counterName
+ );
+ configuration.id = '%@/%@'.fmt(groupName, counterName),
configuration.getSortValue = App.Helpers.misc.getCounterCellContent;
configuration.getCellContent =
@@ -413,6 +462,45 @@ App.Helpers.misc = {
}
},
+ /**
+ * Returns in/out/empty string based counter group type
+ * @param counterGroupName {String}
+ * @return in/out/empty string
+ */
+ checkIOCounterGroup: function (counterGroupName) {
+ if(counterGroupName == undefined){
+ debugger;
+ }
+ var relationPart = counterGroupName.substr(counterGroupName.indexOf('_') + 1);
+ if(relationPart.match('_INPUT_')) {
+ return 'in';
+ }
+ else if(relationPart.match('_OUTPUT_')) {
+ return 'out';
+ }
+ return '';
+ },
+
+ /**
+ * Return unique values form array based on a property
+ * @param array {Array}
+ * @param property {String}
+ * @return uniqueArray {Array}
+ */
+ getUniqueByProperty: function (array, property) {
+ var propHash = {},
+ uniqueArray = [];
+
+ array.forEach(function (item) {
+ if(item && !propHash[item[property]]) {
+ uniqueArray.push(item);
+ propHash[item[property]] = true;
+ }
+ });
+
+ return uniqueArray;
+ },
+
timelinePathForType: (function () {
var typeToPathMap = {
dag: 'TEZ_DAG_ID',
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/mixins/auto-counter-column-mixin.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/mixins/auto-counter-column-mixin.js b/tez-ui/src/main/webapp/app/scripts/mixins/auto-counter-column-mixin.js
new file mode 100644
index 0000000..2c6b531
--- /dev/null
+++ b/tez-ui/src/main/webapp/app/scripts/mixins/auto-counter-column-mixin.js
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+App.AutoCounterColumnMixin = Em.Mixin.create({
+
+ baseEntityType: null, // Must be set in the controller that uses this Mixin
+
+ columnSelectorMessage: function () {
+ return "<span class='per-io'>Per-IO counter</span> selection wouldn't persist across %@.".fmt(
+ this.get('filterEntityType').pluralize()
+ );
+ }.property('filterEntityType'),
+
+ columnConfigs: function() {
+ var counterConfigs = App.Helpers.misc.normalizeCounterConfigs(
+ App.get('Configs.defaultCounters').concat(
+ App.get('Configs.tables.entity.' + this.get('baseEntityType')) || [],
+ App.get('Configs.tables.sharedColumns') || []
+ )
+ ), dynamicCounterConfigs = [];
+
+ this.get('data').forEach(function (row) {
+ var counterGroups = row.get('counterGroups');
+ if(counterGroups) {
+ counterGroups.forEach(function (group) {
+ group.counters.forEach(function (counter) {
+ dynamicCounterConfigs.push({
+ counterName: counter.counterName,
+ counterGroupName: group.counterGroupName
+ });
+ });
+ });
+ }
+ });
+
+ return this.get('defaultColumnConfigs').concat(
+ App.Helpers.misc.getUniqueByProperty(counterConfigs.concat(
+ App.Helpers.misc.normalizeCounterConfigs(dynamicCounterConfigs)
+ ), 'id')
+ );
+ }.property('data', 'defaultColumnConfigs', 'baseEntityType')
+
+});
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/mixins/column-selector-mixin.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/mixins/column-selector-mixin.js b/tez-ui/src/main/webapp/app/scripts/mixins/column-selector-mixin.js
index d3b3bbd..3a76d61 100644
--- a/tez-ui/src/main/webapp/app/scripts/mixins/column-selector-mixin.js
+++ b/tez-ui/src/main/webapp/app/scripts/mixins/column-selector-mixin.js
@@ -38,8 +38,10 @@ App.ColumnSelectorMixin = Em.Mixin.create({
_storeKey: '',
visibleColumnIds: {},
columnConfigs: [],
+ selectOptions: [],
columnSelectorTitle: 'Column Selector',
+ columnSelectorMessage: '',
init: function(){
var visibleColumnIds;
@@ -59,7 +61,7 @@ App.ColumnSelectorMixin = Em.Mixin.create({
this._super();
this.set('visibleColumnIds', visibleColumnIds);
- },
+ }.observes('defaultColumnConfigs'), //To reset on entity change
columns: function() {
var visibleColumnConfigs = this.get('columnConfigs').filter(function (column) {
@@ -67,23 +69,76 @@ App.ColumnSelectorMixin = Em.Mixin.create({
}, this);
return App.Helpers.misc.createColumnDescription(visibleColumnConfigs);
- }.property('visibleColumnIds'),
+ }.property('visibleColumnIds', 'columnConfigs'),
+
+ _getSelectOptions: function () {
+ var group = null,
+ highlight = false,
+ visibleColumnIds = this.get('visibleColumnIds');
+
+ return this.get('columnConfigs').map(function (config) {
+ var css = '';
+
+ highlight = highlight ^ (config.counterGroupName != group),
+ group = config.counterGroupName;
+
+ if(highlight) {
+ css += ' highlight';
+ }
+ if(group && App.Helpers.misc.checkIOCounterGroup(group)) {
+ css += ' per-io';
+ }
+
+ return Em.Object.create({
+ id: config.id,
+ displayText: config.headerCellName,
+ css: css,
+ selected: visibleColumnIds[config.id]
+ });
+ });
+ },
actions: {
selectColumns: function () {
- var that = this;
-
- App.Dialogs.displayMultiSelect(this.get('columnSelectorTitle'), this.get('columnConfigs'), this.visibleColumnIds, {
- displayText: 'headerCellName'
- }).then(function (data) {
- if(isObjectsDifferent(data, that.visibleColumnIds)) {
- try {
- localStorage.setItem(that._storeKey , JSON.stringify(data));
- }catch(e){}
- that.set('visibleColumnIds', data);
- }
- });
+ this.set('selectOptions', this._getSelectOptions());
+
+ Bootstrap.ModalManager.open(
+ 'columnSelector',
+ this.get('columnSelectorTitle'),
+ App.MultiSelectView.extend({
+ options: this.get('selectOptions'),
+ message: this.get('columnSelectorMessage')
+ }),
+ [Ember.Object.create({
+ title: 'Ok',
+ dismiss: 'modal',
+ clicked: 'selectionChange'
+ })],
+ this
+ );
+ },
+
+ selectionChange: function () {
+ var visibleColumnIds = this.get('selectOptions').reduce(function (obj, option) {
+ if(option.get('selected')) {
+ obj[option.get('id')] = true;
+ }
+ return obj;
+ }, {}),
+ selectionToSave = this.get('selectOptions').reduce(function (obj, option) {
+ var id = option.id;
+ if(!id.match('_INPUT_') && !id.match('_OUTPUT_') && visibleColumnIds[id]) {
+ obj[id] = true;
+ }
+ return obj;
+ }, {});
+
+ if(isObjectsDifferent(visibleColumnIds, this.get('visibleColumnIds'))) {
+ try {
+ localStorage.setItem(this._storeKey , JSON.stringify(selectionToSave));
+ }catch(e){}
+ this.set('visibleColumnIds', visibleColumnIds);
+ }
}
}
-
});
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/views/checkbox.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/views/checkbox.js b/tez-ui/src/main/webapp/app/scripts/views/checkbox.js
new file mode 100644
index 0000000..da2de14
--- /dev/null
+++ b/tez-ui/src/main/webapp/app/scripts/views/checkbox.js
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+App.Checkbox = Em.Checkbox.extend({
+ change: function() {
+ var value = this.get('checked'),
+ target = this.get('target') || this.get('context');
+
+ if(target) {
+ Em.run.later(target.send.bind(target, this.get('action'), value), 100);
+ }
+ return true;
+ }
+});
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/scripts/views/multi-select-view.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/views/multi-select-view.js b/tez-ui/src/main/webapp/app/scripts/views/multi-select-view.js
new file mode 100644
index 0000000..ace7b94
--- /dev/null
+++ b/tez-ui/src/main/webapp/app/scripts/views/multi-select-view.js
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+App.MultiSelectView = Ember.View.extend({
+ templateName: 'views/multi-select',
+ classNames: ['multi-select'],
+
+ selectAll: false,
+ searchRegex: '',
+
+ options: null, //Must be set by sub-classes or instances
+
+ _validRegEx: function () {
+ var regExText = this.get('searchRegex');
+ regExText = regExText.substr(regExText.indexOf(':') + 1);
+ try {
+ new RegExp(regExText, 'im');
+ }
+ catch(e) {
+ return false;
+ }
+ return true;
+ }.property('searchRegex'),
+
+ visibleOptions: function () {
+ var options = this.get('options'),
+ regExText = this.get('searchRegex'),
+ regEx;
+
+ if (Em.isEmpty(regExText) || !this.get('_validRegEx')) {
+ return options;
+ }
+
+ regEx = new RegExp(regExText, 'i');
+ return options.filter(function (option) {
+ return regEx.test(option.get('displayText'));
+ });
+ }.property('options', 'searchRegex'),
+
+ _selectObserver: function () {
+ var selectedCount = 0;
+ this.get('visibleOptions').forEach(function (option) {
+ if(option.get('selected')) {
+ selectedCount++;
+ }
+ });
+ this.set('selectAll', selectedCount > 0 && selectedCount == this.get('visibleOptions.length'));
+ }.observes('visibleOptions.@each.selected'),
+
+ actions: {
+ selectAll: function (checked) {
+ this.get('visibleOptions').forEach(function (option) {
+ option.set('selected', checked);
+ });
+ }
+ }
+});
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/styles/colors.less
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/styles/colors.less b/tez-ui/src/main/webapp/app/styles/colors.less
index aa0d96a..a426893 100644
--- a/tez-ui/src/main/webapp/app/styles/colors.less
+++ b/tez-ui/src/main/webapp/app/styles/colors.less
@@ -33,6 +33,7 @@
@text-color: #666666;
@text-red: red;
@text-light: #BBBBBB;
+@text-green: green;
@top-nav-bg-color-from: #d5d5d5;
@top-nav-bg-color-to: #f0f0f0;
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/styles/main.less
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/styles/main.less b/tez-ui/src/main/webapp/app/styles/main.less
index 5da7e06..ad3f132 100644
--- a/tez-ui/src/main/webapp/app/styles/main.less
+++ b/tez-ui/src/main/webapp/app/styles/main.less
@@ -657,6 +657,10 @@ body, html {
overflow: hidden;
transform: translateZ(0);
+
+ i {
+ position: static;
+ }
}
.horizontal-half {
@@ -715,6 +719,11 @@ body, html {
overflow: auto;
+ .data-availability-message {
+ font-size: 24px;
+ text-align: center;
+ }
+
.table-body {
.noise-background;
white-space: nowrap;
@@ -788,11 +797,6 @@ body, html {
padding-right: 15px;
- -webkit-user-select: none;
- -moz-user-select: none;
- -ms-user-select: none;
- user-select: none;
-
background-color: @bg-grey;
border-bottom: 1px solid @border-color;
}
@@ -849,3 +853,48 @@ body, html {
margin-left: -1px;
margin-top: -1px;
}
+
+.multi-select {
+ .message {
+ text-align: right;
+ font-size: 10px;
+ }
+
+ .selection-list {
+ border: 1px solid @border-color;
+
+ .highlight {
+ background-color: @bg-lite;
+ }
+ .select-option, .search-option {
+ border-top: 1px dotted @border-color;
+ padding: 5px;
+
+ .checkbox {
+ margin-right: 10px;
+ float: left;
+ vertical-align: middle;
+ }
+ }
+ .search-option {
+ border: none;
+
+ .form-group {
+ .inline-block;
+ .align-top;
+
+ width: 470px;
+ margin: 4px 0px 0px 20px;
+ }
+ }
+ }
+}
+
+.modal-body {
+ max-height:500px;
+ overflow-y: auto;
+}
+
+.per-io {
+ color: @text-green;
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/styles/shared.less
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/styles/shared.less b/tez-ui/src/main/webapp/app/styles/shared.less
index 7cece16..a3cdf2a 100644
--- a/tez-ui/src/main/webapp/app/styles/shared.less
+++ b/tez-ui/src/main/webapp/app/styles/shared.less
@@ -49,6 +49,10 @@
white-space: nowrap;
}
+.no-border {
+ border: none !important;
+}
+
.align-top {
vertical-align: top;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/templates/common/table.hbs
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/templates/common/table.hbs b/tez-ui/src/main/webapp/app/templates/common/table.hbs
index 631d08d..462089d 100644
--- a/tez-ui/src/main/webapp/app/templates/common/table.hbs
+++ b/tez-ui/src/main/webapp/app/templates/common/table.hbs
@@ -17,35 +17,31 @@
}}
{{#unless loading}}
- {{#if data.length}}
- {{load-time-component
- isRefreshable=isRefreshable
- time=data.content.0.timeStamp
- refresh='refresh'
- }}
+ {{load-time-component
+ isRefreshable=isRefreshable
+ time=data.content.0.timeStamp
+ refresh='refresh'
+ }}
- {{basic-table-component
- columns=columns
- rows=data.content
+ {{basic-table-component
+ columns=columns
+ rows=data.content
- extraHeaderItem=App.ExtraTableButtonsView
- statusMessage=statusMessage
+ extraHeaderItem=App.ExtraTableButtonsView
+ statusMessage=statusMessage
- enableSearch=true
- enablePagination=true
- enableSort=true
+ enableSearch=true
+ enablePagination=true
+ enableSort=true
- pageNumBinding='pageNum'
- rowCountBinding='rowCount'
+ pageNumBinding='pageNum'
+ rowCountBinding='rowCount'
- searchTextBinding='searchText'
+ searchTextBinding='searchText'
- sortColumnIdBinding='sortColumnId'
- sortOrderBinding='sortOrder'
- }}
- {{else}}
- <h1>No records available!</n1>
- {{/if}}
+ sortColumnIdBinding='sortColumnId'
+ sortOrderBinding='sortOrder'
+ }}
{{else}}
{{partial 'partials/loading-spinner'}}
<div class="text-align-center">
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/templates/components/basic-table.hbs
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/templates/components/basic-table.hbs b/tez-ui/src/main/webapp/app/templates/components/basic-table.hbs
index 175a3b9..d1e48b2 100644
--- a/tez-ui/src/main/webapp/app/templates/components/basic-table.hbs
+++ b/tez-ui/src/main/webapp/app/templates/components/basic-table.hbs
@@ -21,7 +21,10 @@
<div class='table-header'>
<div class="horizontal-half align-top">
{{#if enableSearch}}
- {{view App.BasicTableComponent.SearchView text=searchText}}
+ {{view App.BasicTableComponent.SearchView
+ text=searchText
+ placeholder="RegEx or Column1, Column2... :RegEx"
+ }}
{{/if}}
<div class="table-message">
{{#if _statusMessage}}
@@ -40,21 +43,27 @@
</div>
{{/if}}
<div class='table-body-container'>
- <div class='table-body'>
- {{#each column in _columns}}
- <div {{bind-attr
- style=column.customStyle
- class=":table-column _view.contentIndex::first-item"
- }}>
- {{view column.headerCellView}}
- {{#each row in _rows}}
- <div class='table-cell {{unbound firstItemCSS _view}}'>
- {{view column.cellView row=row}}
- </div>
- {{/each}}
- </div>
- {{/each}}
- </div>
+ {{#unless _columns.length}}
+ <div class="data-availability-message">No columns available!</div>
+ {{else}}{{#unless _rows.length}}
+ <div class="data-availability-message">No records available!</div>
+ {{else}}
+ <div class='table-body'>
+ {{#each column in _columns}}
+ <div {{bind-attr
+ style=column.customStyle
+ class=":table-column _view.contentIndex::first-item"
+ }}>
+ {{view column.headerCellView}}
+ {{#each row in _rows}}
+ <div class='table-cell {{unbound firstItemCSS _view}}'>
+ {{view column.cellView row=row}}
+ </div>
+ {{/each}}
+ </div>
+ {{/each}}
+ </div>
+ {{/unless}}{{/unless}}
</div>
<div class='table-footer'>
<div class="horizontal-half align-top">
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/templates/components/basic-table/header-cell.hbs
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/templates/components/basic-table/header-cell.hbs b/tez-ui/src/main/webapp/app/templates/components/basic-table/header-cell.hbs
index c975d32..394752c 100644
--- a/tez-ui/src/main/webapp/app/templates/components/basic-table/header-cell.hbs
+++ b/tez-ui/src/main/webapp/app/templates/components/basic-table/header-cell.hbs
@@ -16,7 +16,7 @@
* limitations under the License.
}}
-<div class='table-header-cell'>
+<div class='table-header-cell' title='{{unbound column.headerCellName}}'>
<div class='cell-content'>
{{unbound column.headerCellName}}
</div>
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/templates/components/basic-table/search-view.hbs
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/templates/components/basic-table/search-view.hbs b/tez-ui/src/main/webapp/app/templates/components/basic-table/search-view.hbs
index 949fc24..4795bbb 100644
--- a/tez-ui/src/main/webapp/app/templates/components/basic-table/search-view.hbs
+++ b/tez-ui/src/main/webapp/app/templates/components/basic-table/search-view.hbs
@@ -19,7 +19,7 @@
<div {{bind-attr class=":input-group view._validRegEx::has-error"}}>
{{input
class="form-control"
- placeholder="RegEx or Column1, Column2... :RegEx"
+ placeholder=view.placeholder
action="search"
value=view._boundText
targetObject=view
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/templates/dags.hbs
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/templates/dags.hbs b/tez-ui/src/main/webapp/app/templates/dags.hbs
index 727e28a..0f272c5 100644
--- a/tez-ui/src/main/webapp/app/templates/dags.hbs
+++ b/tez-ui/src/main/webapp/app/templates/dags.hbs
@@ -89,9 +89,6 @@
statusMessage=statusMessage
}}
- {{#unless sortedContent.length}}
- <h1 class="no-margin">No records available!</h1>
- {{/unless}}
{{else}}
{{partial 'partials/loading-spinner'}}
<div class="text-align-center">
http://git-wip-us.apache.org/repos/asf/tez/blob/12ef073c/tez-ui/src/main/webapp/app/templates/views/multi-select.hbs
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/templates/views/multi-select.hbs b/tez-ui/src/main/webapp/app/templates/views/multi-select.hbs
new file mode 100644
index 0000000..ebab317
--- /dev/null
+++ b/tez-ui/src/main/webapp/app/templates/views/multi-select.hbs
@@ -0,0 +1,55 @@
+{{!
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+}}
+
+<div class="message">
+ {{{view.message}}}
+</div>
+<div class="selection-list">
+ <div class="search-option highlight">
+ <div class="inline-block">
+ Select All<br/>
+ {{view App.Checkbox
+ classNames='inline-display checkbox'
+ checked=view.selectAll
+ action='selectAll'
+ target=view
+ }}
+ </div>
+ <div {{bind-attr class=":form-group view._validRegEx::has-error"}}>
+ {{input
+ class="form-control"
+ placeholder="Filter options"
+ value=view.searchRegex
+ }}
+ </div>
+ </div>
+ {{#if view.visibleOptions.length}}
+ {{#each option in view.visibleOptions}}
+ <div class="select-option {{unbound option.css}}"}}>
+ {{input
+ type="checkbox"
+ classNames='inline-display checkbox'
+ checked=option.selected
+ }}
+ {{option.displayText}}
+ </div>
+ {{/each}}
+ {{else}}
+ <h4> No options available...</h4>
+ {{/if}}
+</div>
\ No newline at end of file
[43/43] tez git commit: TEZ-2420. TaskRunner returning before
executing the task. (sseth)
Posted by ss...@apache.org.
TEZ-2420. TaskRunner returning before executing the task. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/d03e330f
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/d03e330f
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/d03e330f
Branch: refs/heads/TEZ-2003
Commit: d03e330fa05d67512f826a8507e3cf8564ea3fa5
Parents: ea972ac
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed May 6 17:14:56 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:46:11 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java | 1 +
2 files changed, 2 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/d03e330f/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index f8a71e8..9b2339f 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -21,5 +21,6 @@ ALL CHANGES:
TEZ-2381. Fixes after rebase 04/28.
TEZ-2388. Send dag identifier as part of the fetcher request string.
TEZ-2414. LogicalIOProcessorRuntimeTask, RuntimeTask, TezTaskRunner should handle interrupts & carry out necessary cleanups.
+ TEZ-2420. TaskRunner returning before executing the task.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/d03e330f/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
index 7238d5e..dd4620a 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
@@ -105,6 +105,7 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
Throwable failureCause = null;
if (!Thread.currentThread().isInterrupted()) {
taskFuture = executor.submit(callable);
+ } else {
return isShutdownRequested();
}
try {
[27/43] tez git commit: TEZ-2138. Fix minor bugs in adding default
scheduler, getting launchers. (sseth)
Posted by ss...@apache.org.
TEZ-2138. Fix minor bugs in adding default scheduler, getting launchers. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/39295ca2
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/39295ca2
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/39295ca2
Branch: refs/heads/TEZ-2003
Commit: 39295ca2203f1c42cdbb4086df54fca92654d904
Parents: 67e1643
Author: Siddharth Seth <ss...@apache.org>
Authored: Mon Feb 23 20:53:24 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:30 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../src/main/java/org/apache/tez/dag/app/DAGAppMaster.java | 2 +-
.../java/org/apache/tez/dag/app/dag/impl/VertexImpl.java | 4 ++++
.../org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java | 1 +
.../dag/app/launcher/TezTestServiceContainerLauncher.java | 8 +++++---
5 files changed, 12 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/39295ca2/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 4b0a12b..4377f57 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -7,5 +7,6 @@ ALL CHANGES:
TEZ-2123. Fix component managers to use pluggable components. (Enable hybrid mode)
TEZ-2125. Create a task communicator for local mode. Allow tasks to run in the AM.
TEZ-2131. Add additional tests for tasks running in the AM.
+ TEZ-2138. Fix minor bugs in adding default scheduler, getting launchers.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/39295ca2/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index 701eca8..1ea369e 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -1507,7 +1507,7 @@ public class DAGAppMaster extends AbstractService {
@Override
public Integer getContainerLauncherIdentifier(String name) {
- return taskCommunicators.get(name);
+ return containerLaunchers.get(name);
}
@Override
http://git-wip-us.apache.org/repos/asf/tez/blob/39295ca2/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
index 18d3368..0385e64 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
@@ -976,6 +976,10 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
.get(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME, tezDefaultComponentName);
String containerLauncherName = vertexConf
.get(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME, tezDefaultComponentName);
+
+ LOG.info("Vertex: " + logIdentifier + " configured with TaskScheduler=" + taskSchedulerName +
+ ", ContainerLauncher=" + containerLauncherName + ", TaskComm=" + taskCommName);
+
taskSchedulerIdentifier = appContext.getTaskScheduerIdentifier(taskSchedulerName);
taskCommunicatorIdentifier = appContext.getTaskCommunicatorIdentifier(taskCommName);
containerLauncherIdentifier = appContext.getContainerLauncherIdentifier(containerLauncherName);
http://git-wip-us.apache.org/repos/asf/tez/blob/39295ca2/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
index 5a8e9fe..8e5fc71 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
@@ -174,6 +174,7 @@ public class TaskSchedulerEventHandler extends AbstractService
for (int i = 0 ; i < schedulerClasses.length ; i++) { // Copy over the rest.
this.taskSchedulerClasses[i] = schedulerClasses[i];
}
+ this.taskSchedulerClasses[foundYarnTaskSchedulerIndex] = TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT;
} else {
this.taskSchedulerClasses = schedulerClasses;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/39295ca2/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
index 27356bc..cb6308c 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
@@ -23,6 +23,7 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.util.Clock;
import org.apache.tez.dag.app.AppContext;
@@ -54,6 +55,7 @@ public class TezTestServiceContainerLauncher extends AbstractService implements
private final int servicePort;
private final TezTestServiceCommunicator communicator;
private final Clock clock;
+ private final ApplicationAttemptId appAttemptId;
// Configuration passed in here to set up final parameters
@@ -70,6 +72,7 @@ public class TezTestServiceContainerLauncher extends AbstractService implements
this.communicator = new TezTestServiceCommunicator(numThreads);
this.context = appContext;
this.tokenIdentifier = context.getApplicationID().toString();
+ this.appAttemptId = appContext.getApplicationAttemptId();
this.tal = tal;
}
@@ -128,9 +131,8 @@ public class TezTestServiceContainerLauncher extends AbstractService implements
RunContainerRequestProto.Builder builder = RunContainerRequestProto.newBuilder();
InetSocketAddress address = tal.getTaskCommunicator(event.getTaskCommId()).getAddress();
builder.setAmHost(address.getHostName()).setAmPort(address.getPort());
- builder.setAppAttemptNumber(event.getContainer().getId().getApplicationAttemptId().getAttemptId());
- builder.setApplicationIdString(
- event.getContainer().getId().getApplicationAttemptId().getApplicationId().toString());
+ builder.setAppAttemptNumber(appAttemptId.getAttemptId());
+ builder.setApplicationIdString(appAttemptId.getApplicationId().toString());
builder.setTokenIdentifier(tokenIdentifier);
builder.setContainerIdString(event.getContainer().getId().toString());
builder.setCredentialsBinary(
[17/43] tez git commit: TEZ-2426. Ensure the eventRouter thread
completes before switching to a new task and thread safety fixes in
IPOContexts. (sseth)
Posted by ss...@apache.org.
TEZ-2426. Ensure the eventRouter thread completes before switching to a
new task and thread safety fixes in IPOContexts. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/ce69aa1e
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/ce69aa1e
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/ce69aa1e
Branch: refs/heads/TEZ-2003
Commit: ce69aa1e2ca3320d33c833a96a158f94bfd73f52
Parents: 6e6ad70
Author: Siddharth Seth <ss...@apache.org>
Authored: Fri May 8 11:08:14 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 11:08:14 2015 -0700
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../runtime/LogicalIOProcessorRuntimeTask.java | 110 +++++++++++++------
.../runtime/api/impl/TezInputContextImpl.java | 11 +-
.../runtime/api/impl/TezOutputContextImpl.java | 2 +-
.../api/impl/TezProcessorContextImpl.java | 4 +-
.../runtime/api/impl/TezTaskContextImpl.java | 9 +-
.../apache/tez/runtime/task/TaskReporter.java | 47 +++++---
.../TestLogicalIOProcessorRuntimeTask.java | 48 +++++---
8 files changed, 160 insertions(+), 72 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/ce69aa1e/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 185e1b0..efb19b2 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,7 @@ INCOMPATIBLE CHANGES
Default max limit increased. Should not affect existing users.
ALL CHANGES:
+ TEZ-2426. Ensure the eventRouter thread completes before switching to a new task and thread safety fixes in IPOContexts.
TEZ-2412. Should kill vertex in DAGImpl#VertexRerunWhileCommitting
TEZ-2410. VertexGroupCommitFinishedEvent & VertexCommitStartedEvent is not logged correctly
TEZ-776. Reduce AM mem usage caused by storing TezEvents
http://git-wip-us.apache.org/repos/asf/tez/blob/ce69aa1e/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java
index f465d3c..1cfe538 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java
@@ -30,6 +30,7 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
@@ -96,45 +97,46 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
private static final Logger LOG = LoggerFactory
.getLogger(LogicalIOProcessorRuntimeTask.class);
+ @VisibleForTesting // All fields non private for testing.
private final String[] localDirs;
/** Responsible for maintaining order of Inputs */
- private final List<InputSpec> inputSpecs;
- private final Map<String, LogicalInput> inputsMap;
- private final Map<String, InputContext> inputContextMap;
+ final List<InputSpec> inputSpecs;
+ final ConcurrentMap<String, LogicalInput> inputsMap;
+ final ConcurrentMap<String, InputContext> inputContextMap;
/** Responsible for maintaining order of Outputs */
- private final List<OutputSpec> outputSpecs;
- private final Map<String, LogicalOutput> outputsMap;
- private final Map<String, OutputContext> outputContextMap;
+ final List<OutputSpec> outputSpecs;
+ final ConcurrentMap<String, LogicalOutput> outputsMap;
+ final ConcurrentMap<String, OutputContext> outputContextMap;
- private final List<GroupInputSpec> groupInputSpecs;
- private ConcurrentHashMap<String, MergedLogicalInput> groupInputsMap;
+ final List<GroupInputSpec> groupInputSpecs;
+ ConcurrentHashMap<String, MergedLogicalInput> groupInputsMap;
- private final ProcessorDescriptor processorDescriptor;
- private AbstractLogicalIOProcessor processor;
- private ProcessorContext processorContext;
+ final ProcessorDescriptor processorDescriptor;
+ AbstractLogicalIOProcessor processor;
+ ProcessorContext processorContext;
private final MemoryDistributor initialMemoryDistributor;
/** Maps which will be provided to the processor run method */
- private final LinkedHashMap<String, LogicalInput> runInputMap;
- private final LinkedHashMap<String, LogicalOutput> runOutputMap;
+ final LinkedHashMap<String, LogicalInput> runInputMap;
+ final LinkedHashMap<String, LogicalOutput> runOutputMap;
private final Map<String, ByteBuffer> serviceConsumerMetadata;
private final Map<String, String> envMap;
- private final ExecutorService initializerExecutor;
+ final ExecutorService initializerExecutor;
private final CompletionService<Void> initializerCompletionService;
private final Multimap<String, String> startedInputsMap;
- private LinkedBlockingQueue<TezEvent> eventsToBeProcessed;
- private Thread eventRouterThread = null;
+ LinkedBlockingQueue<TezEvent> eventsToBeProcessed;
+ Thread eventRouterThread = null;
private final int appAttemptNumber;
- private final InputReadyTracker inputReadyTracker;
+ private volatile InputReadyTracker inputReadyTracker;
- private final ObjectRegistry objectRegistry;
+ private volatile ObjectRegistry objectRegistry;
private final ExecutionContext ExecutionContext;
private final long memAvailable;
@@ -143,6 +145,7 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
Map<String, ByteBuffer> serviceConsumerMetadata, Map<String, String> envMap,
Multimap<String, String> startedInputsMap, ObjectRegistry objectRegistry,
String pid, ExecutionContext ExecutionContext, long memAvailable) throws IOException {
+ // Note: If adding any fields here, make sure they're cleaned up in the cleanupContext method.
// TODO Remove jobToken from here post TEZ-421
super(taskSpec, tezConf, tezUmbilical, pid);
LOG.info("Initializing LogicalIOProcessorRuntimeTask with TaskSpec: "
@@ -361,6 +364,14 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
setTaskDone();
if (eventRouterThread != null) {
eventRouterThread.interrupt();
+ LOG.info("Joining on EventRouter");
+ try {
+ eventRouterThread.join();
+ } catch (InterruptedException e) {
+ LOG.info("Ignoring interrupt while waiting for the router thread to die");
+ Thread.currentThread().interrupt();
+ }
+ eventRouterThread = null;
}
}
}
@@ -694,14 +705,6 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
eventRouterThread.start();
}
- private void cleanupInputOutputs() {
- if (groupInputsMap != null) {
- groupInputsMap.clear();
- }
- inputsMap.clear();
- outputsMap.clear();
- }
-
private void closeContexts() throws IOException {
closeContext(inputContextMap);
closeContext(outputContextMap);
@@ -725,19 +728,62 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
}
}
- public synchronized void cleanup() {
+ public void cleanup() throws InterruptedException {
+ LOG.info("Final Counters : " + getCounters().toShortString());
+ setTaskDone();
+ if (eventRouterThread != null) {
+ eventRouterThread.interrupt();
+ LOG.info("Joining on EventRouter");
+ try {
+ eventRouterThread.join();
+ } catch (InterruptedException e) {
+ LOG.info("Ignoring interrupt while waiting for the router thread to die");
+ Thread.currentThread().interrupt();
+ }
+ eventRouterThread = null;
+ }
try {
- cleanupInputOutputs();
closeContexts();
+ // Cleanup references which may be held by misbehaved tasks.
+ cleanupStructures();
} catch (IOException e) {
LOG.info("Error while cleaning up contexts ", e);
}
+ }
- LOG.info("Final Counters : " + getCounters().toShortString());
- setTaskDone();
- if (eventRouterThread != null) {
- eventRouterThread.interrupt();
+ private void cleanupStructures() {
+ if (initializerExecutor != null && !initializerExecutor.isShutdown()) {
+ initializerExecutor.shutdownNow();
+ }
+ inputsMap.clear();
+ outputsMap.clear();
+
+ inputSpecs.clear();
+ outputSpecs.clear();
+
+ inputsMap.clear();
+ outputsMap.clear();
+
+ inputContextMap.clear();
+ outputContextMap.clear();
+
+ if (groupInputSpecs != null) {
+ groupInputSpecs.clear();
}
+ if (groupInputsMap != null) {
+ groupInputsMap.clear();
+ groupInputsMap = null;
+ }
+
+ processor = null;
+ processorContext = null;
+
+ runInputMap.clear();
+ runOutputMap.clear();
+
+ eventsToBeProcessed.clear();
+ inputReadyTracker = null;
+ objectRegistry = null;
}
@Private
http://git-wip-us.apache.org/repos/asf/tez/blob/ce69aa1e/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezInputContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezInputContextImpl.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezInputContextImpl.java
index 101aeb9..8d6466a 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezInputContextImpl.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezInputContextImpl.java
@@ -55,12 +55,12 @@ public class TezInputContextImpl extends TezTaskContextImpl
private static final Logger LOG = LoggerFactory.getLogger(TezInputContextImpl.class);
- private UserPayload userPayload;
+ private volatile UserPayload userPayload;
private final String sourceVertexName;
private final EventMetaData sourceInfo;
private final int inputIndex;
private final Map<String, LogicalInput> inputs;
- private InputReadyTracker inputReadyTracker;
+ private volatile InputReadyTracker inputReadyTracker;
private final InputStatisticsReporterImpl statsReporter;
class InputStatisticsReporterImpl implements InputStatisticsReporter {
@@ -159,7 +159,11 @@ public class TezInputContextImpl extends TezTaskContextImpl
@Override
public void inputIsReady() {
- inputReadyTracker.setInputIsReady(inputs.get(sourceVertexName));
+ if (inputReadyTracker != null) {
+ inputReadyTracker.setInputIsReady(inputs.get(sourceVertexName));
+ } else {
+ LOG.warn("Ignoring Input Ready notification since the Task has already been closed");
+ }
}
@Override
@@ -172,7 +176,6 @@ public class TezInputContextImpl extends TezTaskContextImpl
super.close();
this.userPayload = null;
this.inputReadyTracker = null;
- inputs.clear();
LOG.info("Cleared TezInputContextImpl related information");
}
}
http://git-wip-us.apache.org/repos/asf/tez/blob/ce69aa1e/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezOutputContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezOutputContextImpl.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezOutputContextImpl.java
index b46cfd2..71e96db 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezOutputContextImpl.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezOutputContextImpl.java
@@ -53,7 +53,7 @@ public class TezOutputContextImpl extends TezTaskContextImpl
private static final Logger LOG = LoggerFactory.getLogger(TezOutputContextImpl.class);
- private UserPayload userPayload;
+ private volatile UserPayload userPayload;
private final String destinationVertexName;
private final EventMetaData sourceInfo;
private final int outputIndex;
http://git-wip-us.apache.org/repos/asf/tez/blob/ce69aa1e/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezProcessorContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezProcessorContextImpl.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezProcessorContextImpl.java
index d6b3ec5..a191ae8 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezProcessorContextImpl.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezProcessorContextImpl.java
@@ -53,8 +53,8 @@ public class TezProcessorContextImpl extends TezTaskContextImpl implements Proce
private static final Logger LOG = LoggerFactory.getLogger(TezProcessorContextImpl.class);
- private UserPayload userPayload;
- private InputReadyTracker inputReadyTracker;
+ private volatile UserPayload userPayload;
+ private volatile InputReadyTracker inputReadyTracker;
private final EventMetaData sourceInfo;
public TezProcessorContextImpl(Configuration conf, String[] workDirs, int appAttemptNumber,
http://git-wip-us.apache.org/repos/asf/tez/blob/ce69aa1e/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezTaskContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezTaskContextImpl.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezTaskContextImpl.java
index 170741a..5f04c80 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezTaskContextImpl.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezTaskContextImpl.java
@@ -54,15 +54,15 @@ public abstract class TezTaskContextImpl implements TaskContext, Closeable {
private final TezCounters counters;
private String[] workDirs;
private String uniqueIdentifier;
- protected LogicalIOProcessorRuntimeTask runtimeTask;
+ protected final LogicalIOProcessorRuntimeTask runtimeTask;
protected final TezUmbilical tezUmbilical;
private final Map<String, ByteBuffer> serviceConsumerMetadata;
private final int appAttemptNumber;
private final Map<String, String> auxServiceEnv;
- protected MemoryDistributor initialMemoryDistributor;
+ protected volatile MemoryDistributor initialMemoryDistributor;
protected final EntityDescriptor<?> descriptor;
private final String dagName;
- private ObjectRegistry objectRegistry;
+ private volatile ObjectRegistry objectRegistry;
private final int vertexParallelism;
private final ExecutionContext ExecutionContext;
private final long memAvailable;
@@ -225,7 +225,8 @@ public abstract class TezTaskContextImpl implements TaskContext, Closeable {
@Override
public void close() throws IOException {
- this.runtimeTask = null;
+ Preconditions.checkState(runtimeTask.isTaskDone(),
+ "Runtime task must be complete before calling cleanup");
this.objectRegistry = null;
this.initialMemoryDistributor = null;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/ce69aa1e/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
index 3d1d1a2..8b9db16 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
@@ -27,6 +27,7 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Condition;
@@ -112,6 +113,7 @@ public class TaskReporter {
public synchronized void unregisterTask(TezTaskAttemptID taskAttemptID) {
currentCallable.markComplete();
currentCallable = null;
+ // KKK Make sure the callable completes before proceeding
}
public void shutdown() {
@@ -125,7 +127,7 @@ public class TaskReporter {
private static final float LOG_COUNTER_BACKOFF = 1.3f;
private final RuntimeTask task;
- private EventMetaData updateEventMetadata;
+ private final EventMetaData updateEventMetadata;
private final TezTaskUmbilicalProtocol umbilical;
@@ -136,6 +138,9 @@ public class TaskReporter {
private final AtomicLong requestCounter;
+ private final AtomicBoolean finalEventQueued = new AtomicBoolean(false);
+ private final AtomicBoolean askedToDie = new AtomicBoolean(false);
+
private LinkedBlockingQueue<TezEvent> eventsToSend = new LinkedBlockingQueue<TezEvent>();
private final ReentrantLock lock = new ReentrantLock();
@@ -199,6 +204,9 @@ public class TaskReporter {
}
int pendingEventCount = eventsToSend.size();
if (pendingEventCount > 0) {
+ // This is OK because the pending events will be sent via the succeeded/failed messages.
+ // TaskDone is set before taskSucceeded / taskFailed are sent out - which is what causes the
+ // thread to exit.
LOG.warn("Exiting TaskReporter thread with pending queue size=" + pendingEventCount);
}
return true;
@@ -256,6 +264,7 @@ public class TaskReporter {
if (response.shouldDie()) {
LOG.info("Received should die response from AM");
+ askedToDie.set(true);
return new ResponseWrapper(true, 1);
}
if (response.getLastRequestId() != requestId) {
@@ -268,7 +277,7 @@ public class TaskReporter {
int numEventsReceived = 0;
if (task.isTaskDone() || task.hadFatalError()) {
if (response.getEvents() != null && !response.getEvents().isEmpty()) {
- LOG.warn("Current task already complete, Ignoring all event in"
+ LOG.info("Current task already complete, Ignoring all event in"
+ " heartbeat response, eventCount=" + response.getEvents().size());
}
} else {
@@ -315,10 +324,16 @@ public class TaskReporter {
* indicates an exception somewhere in the AM.
*/
private boolean taskSucceeded(TezTaskAttemptID taskAttemptID) throws IOException, TezException {
- TezEvent statusUpdateEvent = new TezEvent(getStatusUpdateEvent(true), updateEventMetadata);
- TezEvent taskCompletedEvent = new TezEvent(new TaskAttemptCompletedEvent(),
- updateEventMetadata);
- return !heartbeat(Lists.newArrayList(statusUpdateEvent, taskCompletedEvent)).shouldDie;
+ // Ensure only one final event is ever sent.
+ if (!finalEventQueued.getAndSet(true)) {
+ TezEvent statusUpdateEvent = new TezEvent(getStatusUpdateEvent(true), updateEventMetadata);
+ TezEvent taskCompletedEvent = new TezEvent(new TaskAttemptCompletedEvent(),
+ updateEventMetadata);
+ return !heartbeat(Lists.newArrayList(statusUpdateEvent, taskCompletedEvent)).shouldDie;
+ } else {
+ LOG.warn("A final task state event has already been sent. Not sending again");
+ return askedToDie.get();
+ }
}
@VisibleForTesting
@@ -351,15 +366,21 @@ public class TaskReporter {
*/
private boolean taskFailed(TezTaskAttemptID taskAttemptID, Throwable t, String diagnostics,
EventMetaData srcMeta) throws IOException, TezException {
- TezEvent statusUpdateEvent = new TezEvent(getStatusUpdateEvent(true), updateEventMetadata);
- if (diagnostics == null) {
- diagnostics = ExceptionUtils.getStackTrace(t);
+ // Ensure only one final event is ever sent.
+ if (!finalEventQueued.getAndSet(true)) {
+ TezEvent statusUpdateEvent = new TezEvent(getStatusUpdateEvent(true), updateEventMetadata);
+ if (diagnostics == null) {
+ diagnostics = ExceptionUtils.getStackTrace(t);
+ } else {
+ diagnostics = diagnostics + ":" + ExceptionUtils.getStackTrace(t);
+ }
+ TezEvent taskAttemptFailedEvent = new TezEvent(new TaskAttemptFailedEvent(diagnostics),
+ srcMeta == null ? updateEventMetadata : srcMeta);
+ return !heartbeat(Lists.newArrayList(statusUpdateEvent, taskAttemptFailedEvent)).shouldDie;
} else {
- diagnostics = diagnostics + ":" + ExceptionUtils.getStackTrace(t);
+ LOG.warn("A final task state event has already been sent. Not sending again");
+ return askedToDie.get();
}
- TezEvent taskAttemptFailedEvent = new TezEvent(new TaskAttemptFailedEvent(diagnostics),
- srcMeta == null ? updateEventMetadata : srcMeta);
- return !heartbeat(Lists.newArrayList(statusUpdateEvent, taskAttemptFailedEvent)).shouldDie;
}
private void addEvents(TezTaskAttemptID taskAttemptID, Collection<TezEvent> events) {
http://git-wip-us.apache.org/repos/asf/tez/blob/ce69aa1e/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestLogicalIOProcessorRuntimeTask.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestLogicalIOProcessorRuntimeTask.java b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestLogicalIOProcessorRuntimeTask.java
index df932cf..b337bc7 100644
--- a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestLogicalIOProcessorRuntimeTask.java
+++ b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestLogicalIOProcessorRuntimeTask.java
@@ -19,6 +19,7 @@
package org.apache.tez.runtime;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.mock;
@@ -26,6 +27,7 @@ import static org.mockito.Mockito.mock;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.HashMap;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@@ -33,6 +35,7 @@ import org.apache.tez.dag.api.InputDescriptor;
import org.apache.tez.dag.api.OutputDescriptor;
import org.apache.tez.dag.api.ProcessorDescriptor;
import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.UserPayload;
import org.apache.tez.dag.records.TezDAGID;
import org.apache.tez.dag.records.TezTaskAttemptID;
import org.apache.tez.dag.records.TezTaskID;
@@ -133,29 +136,43 @@ public class TestLogicalIOProcessorRuntimeTask {
}
- private void cleanupAndTest(LogicalIOProcessorRuntimeTask lio) {
+ private void cleanupAndTest(LogicalIOProcessorRuntimeTask lio) throws InterruptedException {
+
+ ProcessorContext procContext = lio.getProcessorContext();
+ List<InputContext> inputContexts = new LinkedList<InputContext>();
+ inputContexts.addAll(lio.getInputContexts());
+ List<OutputContext> outputContexts = new LinkedList<OutputContext>();
+ outputContexts.addAll(lio.getOutputContexts());
lio.cleanup();
- assertTrue(lio.getProcessorContext().getUserPayload() == null);
- assertTrue(lio.getProcessorContext().getObjectRegistry() == null);
+ assertTrue(procContext.getUserPayload() == null);
+ assertTrue(procContext.getObjectRegistry() == null);
- try {
- lio.getProcessorContext().waitForAnyInputReady(Collections.<Input>emptyList());
- fail("Processor context should have been already cleanup");
- } catch (Throwable t) {
- assertTrue(t instanceof NullPointerException);
+ for (InputContext inputContext : inputContexts) {
+ assertTrue(inputContext.getUserPayload() == null);
+ assertTrue(inputContext.getObjectRegistry() == null);
}
- try {
- lio.getProcessorContext().requestInitialMemory(0, null);
- fail("Processor context should have been already cleanup");
- } catch (Throwable t) {
- assertTrue(t instanceof NullPointerException);
+ for (OutputContext outputContext : outputContexts) {
+ assertTrue(outputContext.getUserPayload() == null);
+ assertTrue(outputContext.getObjectRegistry() == null);
}
- assertTrue(lio.getInputContexts().size() == 0);
- assertTrue(lio.getOutputContexts().size() == 0);
+ assertEquals(0, lio.inputSpecs.size());
+ assertEquals(0, lio.inputsMap.size());
+ assertEquals(0, lio.inputContextMap.size());
+ assertEquals(0, lio.outputSpecs.size());
+ assertEquals(0, lio.outputsMap.size());
+ assertEquals(0, lio.outputContextMap.size());
+ assertTrue(lio.groupInputSpecs == null || lio.groupInputSpecs.size() == 0);
+ assertNull(lio.groupInputsMap);
+ assertNull(lio.processor);
+ assertNull(lio.processorContext);
+ assertEquals(0, lio.runInputMap.size());
+ assertEquals(0, lio.runOutputMap.size());
+ assertEquals(0, lio.eventsToBeProcessed.size());
+ assertNull(lio.eventRouterThread);
}
private TaskSpec createTaskSpec(TezTaskAttemptID taskAttemptID,
@@ -248,7 +265,6 @@ public class TestLogicalIOProcessorRuntimeTask {
public void start() throws Exception {
startCount++;
this.vertexParallelism = getContext().getVertexParallelism();
- System.err.println("In started");
}
@Override
[11/43] tez git commit: TEZ-2423. Tez UI: Remove Attempt Index column
from task->attempts page (Sreenath Somarajapuram via pramachandran)
Posted by ss...@apache.org.
TEZ-2423. Tez UI: Remove Attempt Index column from task->attempts page (Sreenath Somarajapuram via pramachandran)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/a382324c
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/a382324c
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/a382324c
Branch: refs/heads/TEZ-2003
Commit: a382324c24a30ef7b632ca1f003a90873ac802e3
Parents: 70cd396
Author: Prakash Ramachandran <pr...@hortonworks.com>
Authored: Thu May 7 17:52:00 2015 +0530
Committer: Prakash Ramachandran <pr...@hortonworks.com>
Committed: Thu May 7 17:52:00 2015 +0530
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../controllers/task_task_attempts_controller.js | 15 ---------------
2 files changed, 1 insertion(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/a382324c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index c3d48b6..8de61b0 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,7 @@ INCOMPATIBLE CHANGES
Default max limit increased. Should not affect existing users.
ALL CHANGES:
+ TEZ-2423. Tez UI: Remove Attempt Index column from task->attempts page
TEZ-2416. Tez UI: Make tooltips display faster.
TEZ-2404. Handle DataMovementEvent before its TaskAttemptCompletedEvent
TEZ-2424. Bump up max counter group name length limit to account for per_io counters.
http://git-wip-us.apache.org/repos/asf/tez/blob/a382324c/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js b/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js
index c5c9eea..d211479 100644
--- a/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js
+++ b/tez-ui/src/main/webapp/app/scripts/controllers/task_task_attempts_controller.js
@@ -62,21 +62,6 @@ App.TaskAttemptsController = App.TablePageController.extend(App.AutoCounterColum
var that = this;
return [
{
- id: 'id',
- headerCellName: 'Attempt Index',
- templateName: 'components/basic-table/linked-cell',
- contentPath: 'id',
- getCellContent: function (row) {
- var id = row.get('id'),
- idPrefix = 'attempt_%@_'.fmt(row.get('dagID').substr(4));
- return {
- linkTo: 'taskAttempt',
- entityId: id,
- displayText: id.indexOf(idPrefix) == 0 ? id.substr(idPrefix.length) : id
- };
- }
- },
- {
id: 'attemptNo',
headerCellName: 'Attempt No',
templateName: 'components/basic-table/linked-cell',
[33/43] tez git commit: TEZ-2241. Miscellaneous fixes after last
reabse. (sseth)
Posted by ss...@apache.org.
TEZ-2241. Miscellaneous fixes after last reabse. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/5a46aa59
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/5a46aa59
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/5a46aa59
Branch: refs/heads/TEZ-2003
Commit: 5a46aa598a3455ae48354863f91ebb40ee793157
Parents: e1ab191
Author: Siddharth Seth <ss...@apache.org>
Authored: Thu Mar 26 17:57:55 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:30 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java | 2 ++
.../org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java | 6 +++---
.../java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java | 6 +++---
.../apache/tez/dag/app/launcher/ContainerLauncherRouter.java | 6 +++---
.../test/java/org/apache/tez/dag/app/MockDAGAppMaster.java | 2 +-
tez-ext-service-tests/pom.xml | 4 ++--
.../tez/dag/app/launcher/TezTestServiceContainerLauncher.java | 6 +++---
.../dag/app/launcher/TezTestServiceNoOpContainerLauncher.java | 6 +++---
.../tez/dag/app/rm/TezTestServiceTaskSchedulerService.java | 7 ++++---
.../dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java | 7 ++++---
.../org/apache/tez/service/MiniTezTestServiceCluster.java | 6 +++---
.../java/org/apache/tez/service/impl/ContainerRunnerImpl.java | 5 +++--
.../test/java/org/apache/tez/service/impl/TezTestService.java | 4 ++--
.../tez/service/impl/TezTestServiceProtocolServerImpl.java | 6 +++---
.../org/apache/tez/shufflehandler/FadvisedChunkedFile.java | 6 +++---
.../org/apache/tez/shufflehandler/FadvisedFileRegion.java | 6 +++---
.../test/java/org/apache/tez/shufflehandler/IndexCache.java | 6 +++---
.../java/org/apache/tez/shufflehandler/ShuffleHandler.java | 6 +++---
.../java/org/apache/tez/tests/TestExternalTezServices.java | 6 +++---
20 files changed, 55 insertions(+), 49 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 774a685..d51686d 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -11,5 +11,6 @@ ALL CHANGES:
TEZ-2139. Update tez version to 0.7.0-TEZ-2003-SNAPSHOT.
TEZ-2175. Task priority should be available to the TaskCommunicator plugin.
TEZ-2187. Allow TaskCommunicators to report failed / killed attempts.
+ TEZ-2241. Miscellaneous fixes after last reabse.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index 94f6cae..0ee448b 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -67,6 +67,8 @@ import org.apache.tez.dag.records.TezTaskAttemptID;
import org.apache.tez.dag.records.TezVertexID;
import org.apache.tez.runtime.api.impl.TezEvent;
import org.apache.tez.common.security.JobTokenSecretManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
@SuppressWarnings("unchecked")
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java
index 3704cc4..47688d1 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java
@@ -18,14 +18,14 @@ import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.UnknownHostException;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.tez.dag.api.TaskCommunicatorContext;
import org.apache.tez.dag.api.TezUncheckedException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class TezLocalTaskCommunicatorImpl extends TezTaskCommunicatorImpl {
- private static final Log LOG = LogFactory.getLog(TezLocalTaskCommunicatorImpl.class);
+ private static final Logger LOG = LoggerFactory.getLogger(TezLocalTaskCommunicatorImpl.class);
public TezLocalTaskCommunicatorImpl(
TaskCommunicatorContext taskCommunicatorContext) {
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
index f288748..9d0c031 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -23,8 +23,6 @@ import java.util.concurrent.ConcurrentMap;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
@@ -55,11 +53,13 @@ import org.apache.tez.dag.records.TezTaskAttemptID;
import org.apache.tez.runtime.api.impl.TaskSpec;
import org.apache.tez.runtime.api.impl.TezHeartbeatRequest;
import org.apache.tez.runtime.api.impl.TezHeartbeatResponse;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
@InterfaceAudience.Private
public class TezTaskCommunicatorImpl extends TaskCommunicator {
- private static final Log LOG = LogFactory.getLog(TezTaskCommunicatorImpl.class);
+ private static final Logger LOG = LoggerFactory.getLogger(TezTaskCommunicatorImpl.class);
private static final ContainerTask TASK_FOR_INVALID_JVM = new ContainerTask(
null, true, null, null, false);
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
index 70b0cbc..dd3571e 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
@@ -19,8 +19,6 @@ import java.lang.reflect.InvocationTargetException;
import java.net.UnknownHostException;
import com.google.common.annotations.VisibleForTesting;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.event.EventHandler;
@@ -30,11 +28,13 @@ import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.app.AppContext;
import org.apache.tez.dag.app.TaskAttemptListener;
import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class ContainerLauncherRouter extends AbstractService
implements EventHandler<NMCommunicatorEvent> {
- static final Log LOG = LogFactory.getLog(ContainerLauncherImpl.class);
+ static final Logger LOG = LoggerFactory.getLogger(ContainerLauncherImpl.class);
private final ContainerLauncher containerLaunchers[];
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index 4f014a4..051497b 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -340,7 +340,7 @@ public class MockDAGAppMaster extends DAGAppMaster {
long startTime = System.nanoTime();
long startCpuTime = threadMxBean.getCurrentThreadCpuTime();
TaskHeartbeatResponse response = taListener.heartbeat(request);
- if (response.shouldDie()) {
+ if (response.isShouldDie()) {
cData.remove();
} else {
cData.nextFromEventId += response.getNextFromEventId();
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/pom.xml
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/pom.xml b/tez-ext-service-tests/pom.xml
index 1113341..907e129 100644
--- a/tez-ext-service-tests/pom.xml
+++ b/tez-ext-service-tests/pom.xml
@@ -28,8 +28,8 @@
<dependencies>
<dependency>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
index cb6308c..9d22196 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
@@ -18,8 +18,6 @@ import java.net.InetSocketAddress;
import com.google.common.base.Preconditions;
import com.google.protobuf.ByteString;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.ApplicationConstants;
@@ -40,6 +38,8 @@ import org.apache.tez.dag.history.events.ContainerLaunchedEvent;
import org.apache.tez.service.TezTestServiceConfConstants;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class TezTestServiceContainerLauncher extends AbstractService implements ContainerLauncher {
@@ -47,7 +47,7 @@ public class TezTestServiceContainerLauncher extends AbstractService implements
// TODO May need multiple connections per target machine, depending upon how synchronization is handled in the RPC layer
- static final Log LOG = LogFactory.getLog(TezTestServiceContainerLauncher.class);
+ static final Logger LOG = LoggerFactory.getLogger(TezTestServiceContainerLauncher.class);
private final AppContext context;
private final String tokenIdentifier;
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java
index 8c8e486..977d0d3 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java
@@ -14,8 +14,6 @@
package org.apache.tez.dag.app.launcher;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.util.Clock;
@@ -28,10 +26,12 @@ import org.apache.tez.dag.app.rm.container.AMContainerEventLaunched;
import org.apache.tez.dag.app.rm.container.AMContainerEventType;
import org.apache.tez.dag.history.DAGHistoryEvent;
import org.apache.tez.dag.history.events.ContainerLaunchedEvent;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class TezTestServiceNoOpContainerLauncher extends AbstractService implements ContainerLauncher {
- static final Log LOG = LogFactory.getLog(TezTestServiceNoOpContainerLauncher.class);
+ static final Logger LOG = LoggerFactory.getLogger(TezTestServiceNoOpContainerLauncher.class);
private final AppContext context;
private final Clock clock;
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
index 872d592..50dfb24 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
@@ -28,8 +28,6 @@ import java.util.concurrent.atomic.AtomicInteger;
import com.google.common.base.Preconditions;
import com.google.common.primitives.Ints;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -41,11 +39,14 @@ import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.tez.dag.api.TaskAttemptEndReason;
import org.apache.tez.dag.app.AppContext;
import org.apache.tez.service.TezTestServiceConfConstants;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
- private static final Log LOG = LogFactory.getLog(TezTestServiceTaskSchedulerService.class);
+ private static final Logger
+ LOG = LoggerFactory.getLogger(TezTestServiceTaskSchedulerService.class);
private final ExecutorService appCallbackExecutor;
private final TaskSchedulerAppCallback appClientDelegate;
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
index e3385a2..ef983c2 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
@@ -23,8 +23,6 @@ import java.util.concurrent.RejectedExecutionException;
import com.google.protobuf.ByteString;
import com.google.protobuf.ServiceException;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.ipc.RemoteException;
@@ -41,11 +39,14 @@ import org.apache.tez.runtime.api.impl.TaskSpec;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkResponseProto;
import org.apache.tez.util.ProtoConverters;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class TezTestServiceTaskCommunicatorImpl extends TezTaskCommunicatorImpl {
- private static final Log LOG = LogFactory.getLog(TezTestServiceTaskCommunicatorImpl.class);
+ private static final Logger
+ LOG = LoggerFactory.getLogger(TezTestServiceTaskCommunicatorImpl.class);
private final TezTestServiceCommunicator communicator;
private final SubmitWorkRequestProto BASE_SUBMIT_WORK_REQUEST;
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
index 0ac0b33..c5ff02d 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
@@ -19,8 +19,6 @@ import java.io.IOException;
import java.net.InetSocketAddress;
import com.google.common.base.Preconditions;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.Path;
@@ -28,10 +26,12 @@ import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.tez.service.impl.TezTestService;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class MiniTezTestServiceCluster extends AbstractService {
- private static final Log LOG = LogFactory.getLog(MiniTezTestServiceCluster.class);
+ private static final Logger LOG = LoggerFactory.getLogger(MiniTezTestServiceCluster.class);
private final File testWorkDir;
private final long availableMemory;
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
index 379d952..e7c02c8 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
@@ -52,7 +52,6 @@ import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.util.AuxiliaryServiceHelper;
-import org.apache.log4j.Logger;
import org.apache.tez.common.TezCommonUtils;
import org.apache.tez.common.TezTaskUmbilicalProtocol;
import org.apache.tez.common.security.JobTokenIdentifier;
@@ -75,10 +74,12 @@ import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainer
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.TaskSpecProto;
import org.apache.tez.util.ProtoConverters;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class ContainerRunnerImpl extends AbstractService implements ContainerRunner {
- private static final Logger LOG = Logger.getLogger(ContainerRunnerImpl.class);
+ private static final Logger LOG = LoggerFactory.getLogger(ContainerRunnerImpl.class);
public static final String DAG_NAME_INSTRUMENTED_FAILURES = "InstrumentedFailures";
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
index 855f1b0..322be00 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
@@ -23,16 +23,16 @@ import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.util.StringUtils;
-import org.apache.log4j.Logger;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.service.ContainerRunner;
import org.apache.tez.shufflehandler.ShuffleHandler;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+import org.slf4j.LoggerFactory;
public class TezTestService extends AbstractService implements ContainerRunner {
- private static final Logger LOG = Logger.getLogger(TezTestService.class);
+ private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(TezTestService.class);
private final Configuration shuffleHandlerConf;
private final int numExecutors;
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
index 39d7156..b5d3f83 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
@@ -22,8 +22,6 @@ import com.google.common.annotations.VisibleForTesting;
import com.google.protobuf.BlockingService;
import com.google.protobuf.RpcController;
import com.google.protobuf.ServiceException;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ipc.ProtobufRpcEngine;
@@ -38,11 +36,13 @@ import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainer
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerResponseProto;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkResponseProto;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class TezTestServiceProtocolServerImpl extends AbstractService
implements TezTestServiceProtocolBlockingPB {
- private static final Log LOG = LogFactory.getLog(TezTestServiceProtocolServerImpl.class);
+ private static final Logger LOG = LoggerFactory.getLogger(TezTestServiceProtocolServerImpl.class);
private final ContainerRunner containerRunner;
private RPC.Server server;
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java
index 65588fe..294add6 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java
@@ -18,16 +18,16 @@ import java.io.FileDescriptor;
import java.io.IOException;
import java.io.RandomAccessFile;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.ReadaheadPool;
import org.apache.hadoop.io.ReadaheadPool.ReadaheadRequest;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.jboss.netty.handler.stream.ChunkedFile;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class FadvisedChunkedFile extends ChunkedFile {
- private static final Log LOG = LogFactory.getLog(FadvisedChunkedFile.class);
+ private static final Logger LOG = LoggerFactory.getLogger(FadvisedChunkedFile.class);
private final boolean manageOsCache;
private final int readaheadLength;
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java
index bdffe52..e5392d3 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java
@@ -22,16 +22,16 @@ import java.nio.channels.FileChannel;
import java.nio.channels.WritableByteChannel;
import com.google.common.annotations.VisibleForTesting;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.ReadaheadPool;
import org.apache.hadoop.io.ReadaheadPool.ReadaheadRequest;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.jboss.netty.channel.DefaultFileRegion;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class FadvisedFileRegion extends DefaultFileRegion {
- private static final Log LOG = LogFactory.getLog(FadvisedFileRegion.class);
+ private static final Logger LOG = LoggerFactory.getLogger(FadvisedFileRegion.class);
private final boolean manageOsCache;
private final int readaheadLength;
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java
index 9a51ca0..5a45917 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java
@@ -22,20 +22,20 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.tez.runtime.library.common.Constants;
import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord;
import org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
class IndexCache {
private final Configuration conf;
private final int totalMemoryAllowed;
private AtomicInteger totalMemoryUsed = new AtomicInteger();
- private static final Log LOG = LogFactory.getLog(IndexCache.class);
+ private static final Logger LOG = LoggerFactory.getLogger(IndexCache.class);
private final ConcurrentHashMap<String,IndexInformation> cache =
new ConcurrentHashMap<String,IndexInformation>();
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java
index cc82d74..8cbb8c7 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java
@@ -51,8 +51,6 @@ import java.util.regex.Pattern;
import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.fs.Path;
@@ -105,10 +103,12 @@ import org.jboss.netty.handler.codec.http.QueryStringDecoder;
import org.jboss.netty.handler.ssl.SslHandler;
import org.jboss.netty.handler.stream.ChunkedWriteHandler;
import org.jboss.netty.util.CharsetUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class ShuffleHandler {
- private static final Log LOG = LogFactory.getLog(ShuffleHandler.class);
+ private static final Logger LOG = LoggerFactory.getLogger(ShuffleHandler.class);
public static final String SHUFFLE_HANDLER_LOCAL_DIRS = "tez.shuffle.handler.local-dirs";
http://git-wip-us.apache.org/repos/asf/tez/blob/5a46aa59/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
index b6a166d..4d0a610 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
@@ -20,8 +20,6 @@ import java.io.IOException;
import java.util.Map;
import com.google.common.collect.Maps;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -48,10 +46,12 @@ import org.apache.tez.test.MiniTezCluster;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class TestExternalTezServices {
- private static final Log LOG = LogFactory.getLog(TestExternalTezServices.class);
+ private static final Logger LOG = LoggerFactory.getLogger(TestExternalTezServices.class);
private static final String EXT_PUSH_ENTITY_NAME = "ExtServiceTestPush";
[07/43] tez git commit: TEZ-2424. Bump up max counter group name
length limit to account for per_io counters. (hitesh)
Posted by ss...@apache.org.
TEZ-2424. Bump up max counter group name length limit to account for per_io counters. (hitesh)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/78ca7af6
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/78ca7af6
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/78ca7af6
Branch: refs/heads/TEZ-2003
Commit: 78ca7af63987e6052d8756f9ab827cb10b26dbcf
Parents: 5530863
Author: Hitesh Shah <hi...@apache.org>
Authored: Wed May 6 15:00:48 2015 -0700
Committer: Hitesh Shah <hi...@apache.org>
Committed: Wed May 6 15:00:48 2015 -0700
----------------------------------------------------------------------
CHANGES.txt | 5 ++++-
.../src/main/java/org/apache/tez/dag/api/TezConfiguration.java | 2 +-
2 files changed, 5 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/78ca7af6/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index c865f12..73f8fda 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -14,9 +14,12 @@ Release 0.7.0: Unreleased
INCOMPATIBLE CHANGES
TEZ-2176. Move all logging to slf4j. (commons-logging jar no longer part of Tez tar)
- TEZ-1993. Implement a pluggable InputSizeEstimator for grouping fairly
+ TEZ-1993. Implement a pluggable InputSizeEstimator for grouping fairly.
+ TEZ-2424. Bump up max counter group name length limit to account for per_io counters.
+ Default max limit increased. Should not affect existing users.
ALL CHANGES:
+ TEZ-2424. Bump up max counter group name length limit to account for per_io counters.
TEZ-2417. Tez UI: Counters are blank in the Attempts page if all attempts failed
TEZ-2366. Pig tez MiniTezCluster unit tests fail intermittently after TEZ-2333
TEZ-2406. Tez UI: Display per-io counter columns in task and attempt pages under vertex
http://git-wip-us.apache.org/repos/asf/tez/blob/78ca7af6/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
index f64172e..e4170e9 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
@@ -372,7 +372,7 @@ public class TezConfiguration extends Configuration {
@ConfigurationScope(Scope.AM)
public static final String TEZ_COUNTERS_GROUP_NAME_MAX_LENGTH =
TEZ_PREFIX + "counters.group-name.max-length";
- public static final int TEZ_COUNTERS_GROUP_NAME_MAX_LENGTH_DEFAULT = 128;
+ public static final int TEZ_COUNTERS_GROUP_NAME_MAX_LENGTH_DEFAULT = 256;
@Unstable
/**
[37/43] tez git commit: TEZ-2302. Allow TaskCommunicators to
subscribe for Vertex updates. (sseth)
Posted by ss...@apache.org.
TEZ-2302. Allow TaskCommunicators to subscribe for Vertex updates. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/e7c3cc72
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/e7c3cc72
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/e7c3cc72
Branch: refs/heads/TEZ-2003
Commit: e7c3cc72ec6c7c680adb2be7c2594a77409426d4
Parents: 5239a45
Author: Siddharth Seth <ss...@apache.org>
Authored: Thu Apr 9 13:33:48 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:31 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../apache/tez/dag/api/TaskCommunicator.java | 20 +++
.../tez/dag/api/TaskCommunicatorContext.java | 14 ++-
.../dag/app/TaskAttemptListenerImpTezDag.java | 52 +++-----
.../dag/app/TaskCommunicatorContextImpl.java | 124 +++++++++++++++++++
.../tez/dag/app/TezTaskCommunicatorImpl.java | 6 +
.../java/org/apache/tez/dag/app/dag/DAG.java | 2 +
.../apache/tez/dag/app/dag/impl/DAGImpl.java | 5 +
8 files changed, 188 insertions(+), 36 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/e7c3cc72/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 9d6b220..ca5225e 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -15,5 +15,6 @@ ALL CHANGES:
TEZ-2283. Fixes after rebase 04/07.
TEZ-2284. Separate TaskReporter into an interface.
TEZ-2285. Allow TaskCommunicators to indicate task/container liveness.
+ TEZ-2302. Allow TaskCommunicators to subscribe for Vertex updates.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/e7c3cc72/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
index 945091e..a2cd858 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.tez.dag.api.event.VertexStateUpdate;
import org.apache.tez.dag.records.TezTaskAttemptID;
import org.apache.tez.runtime.api.impl.TaskSpec;
@@ -54,4 +55,23 @@ public abstract class TaskCommunicator extends AbstractService {
public abstract InetSocketAddress getAddress();
// TODO Eventually. Add methods here to support preemption of tasks.
+
+ /**
+ * Receive notifications on vertex state changes.
+ * <p/>
+ * State changes will be received based on the registration via {@link
+ * org.apache.tez.runtime.api.InputInitializerContext#registerForVertexStateUpdates(String,
+ * java.util.Set)}. Notifications will be received for all registered state changes, and not just
+ * for the latest state update. They will be in order in which the state change occurred. </p>
+ *
+ * Extensive processing should not be performed via this method call. Instead this should just be
+ * used as a notification mechanism.
+ * <br>This method may be invoked concurrently with other invocations into the TaskCommunicator and
+ * multi-threading/concurrency implications must be considered.
+ * @param stateUpdate an event indicating the name of the vertex, and it's updated state.
+ * Additional information may be available for specific events, Look at the
+ * type hierarchy for {@link org.apache.tez.dag.api.event.VertexStateUpdate}
+ * @throws Exception
+ */
+ public abstract void onVertexStateUpdated(VertexStateUpdate stateUpdate) throws Exception;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/e7c3cc72/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
index 0c3bac3..19caed9 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
@@ -16,10 +16,12 @@ package org.apache.tez.dag.api;
import javax.annotation.Nullable;
import java.io.IOException;
+import java.util.Set;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.tez.dag.api.event.VertexState;
import org.apache.tez.dag.records.TezTaskAttemptID;
@@ -48,7 +50,7 @@ public interface TaskCommunicatorContext {
void containerAlive(ContainerId containerId);
// TODO TEZ-2003 Move to vertex, taskIndex, version. Rename to taskAttempt*
- void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId);
+ void taskStartedRemotely(TezTaskAttemptID taskAttemptId, ContainerId containerId);
// TODO TEZ-2003 Move to vertex, taskIndex, version. Rename to taskAttempt*
void taskKilled(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason, @Nullable String diagnostics);
@@ -56,6 +58,16 @@ public interface TaskCommunicatorContext {
// TODO TEZ-2003 Move to vertex, taskIndex, version. Rename to taskAttempt*
void taskFailed(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason, @Nullable String diagnostics);
+ /**
+ * Register to get notifications on updates to the specified vertex. Notifications will be sent
+ * via {@link org.apache.tez.runtime.api.InputInitializer#onVertexStateUpdated(org.apache.tez.dag.api.event.VertexStateUpdate)} </p>
+ *
+ * This method can only be invoked once. Duplicate invocations will result in an error.
+ *
+ * @param vertexName the vertex name for which notifications are required.
+ * @param stateSet the set of states for which notifications are required. null implies all
+ */
+ void registerForVertexStateUpdates(String vertexName, @Nullable Set<VertexState> stateSet);
// TODO TEZ-2003 API. Should a method exist for task succeeded.
// TODO Eventually Add methods to report availability stats to the scheduler.
http://git-wip-us.apache.org/repos/asf/tez/blob/e7c3cc72/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index cd39cc8..aaf9cca 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -17,6 +17,7 @@
package org.apache.tez.dag.app;
+import javax.annotation.Nullable;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
@@ -26,6 +27,7 @@ import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
@@ -35,13 +37,13 @@ import org.apache.tez.dag.app.dag.event.TaskAttemptEventStatusUpdate;
import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
import org.apache.tez.runtime.api.events.TaskStatusUpdateEvent;
import org.apache.tez.runtime.api.impl.EventType;
+import com.google.common.base.Preconditions;
+import org.apache.tez.dag.api.event.VertexState;
+import org.apache.tez.dag.api.event.VertexStateUpdate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.service.AbstractService;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.tez.common.ReflectionUtils;
import org.apache.tez.common.TezUtilsInternal;
@@ -67,14 +69,12 @@ import org.apache.tez.dag.records.TezTaskAttemptID;
import org.apache.tez.dag.records.TezVertexID;
import org.apache.tez.runtime.api.impl.TezEvent;
import org.apache.tez.common.security.JobTokenSecretManager;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
@SuppressWarnings("unchecked")
@InterfaceAudience.Private
public class TaskAttemptListenerImpTezDag extends AbstractService implements
- TaskAttemptListener, TaskCommunicatorContext {
+ TaskAttemptListener {
private static final Logger LOG = LoggerFactory
.getLogger(TaskAttemptListenerImpTezDag.class);
@@ -124,7 +124,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
this.taskCommunicators = new TaskCommunicator[taskCommunicatorClassIdentifiers.length];
for (int i = 0 ; i < taskCommunicatorClassIdentifiers.length ; i++) {
- taskCommunicators[i] = createTaskCommunicator(taskCommunicatorClassIdentifiers[i]);
+ taskCommunicators[i] = createTaskCommunicator(taskCommunicatorClassIdentifiers[i], i);
}
// TODO TEZ-2118 Start using taskCommunicator indices properly
}
@@ -145,13 +145,13 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
}
- private TaskCommunicator createTaskCommunicator(String taskCommClassIdentifier) {
+ private TaskCommunicator createTaskCommunicator(String taskCommClassIdentifier, int taskCommIndex) {
if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
LOG.info("Using Default Task Communicator");
- return new TezTaskCommunicatorImpl(this);
+ return new TezTaskCommunicatorImpl(new TaskCommunicatorContextImpl(context, this, taskCommIndex));
} else if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
LOG.info("Using Default Local Task Communicator");
- return new TezLocalTaskCommunicatorImpl(this);
+ return new TezLocalTaskCommunicatorImpl(new TaskCommunicatorContextImpl(context, this, taskCommIndex));
} else {
LOG.info("Using TaskCommunicator: " + taskCommClassIdentifier);
Class<? extends TaskCommunicator> taskCommClazz = (Class<? extends TaskCommunicator>) ReflectionUtils
@@ -159,7 +159,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
try {
Constructor<? extends TaskCommunicator> ctor = taskCommClazz.getConstructor(TaskCommunicatorContext.class);
ctor.setAccessible(true);
- return ctor.newInstance(this);
+ return ctor.newInstance(new TaskCommunicatorContextImpl(context, this, taskCommIndex));
} catch (NoSuchMethodException e) {
throw new TezUncheckedException(e);
} catch (InvocationTargetException e) {
@@ -171,18 +171,6 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
}
}
-
- @Override
- public ApplicationAttemptId getApplicationAttemptId() {
- return context.getApplicationAttemptId();
- }
-
- @Override
- public Credentials getCredentials() {
- return context.getAppCredentials();
- }
-
- @Override
public TaskHeartbeatResponse heartbeat(TaskHeartbeatRequest request)
throws IOException, TezException {
ContainerId containerId = ConverterUtils.toContainerId(request
@@ -252,30 +240,20 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
return new TaskHeartbeatResponse(false, outEvents);
}
-
- @Override
- public boolean isKnownContainer(ContainerId containerId) {
- return context.getAllContainers().get(containerId) != null;
- }
-
- @Override
public void taskAlive(TezTaskAttemptID taskAttemptId) {
taskHeartbeatHandler.pinged(taskAttemptId);
}
- @Override
public void containerAlive(ContainerId containerId) {
pingContainerHeartbeatHandler(containerId);
}
- @Override
public void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId) {
context.getEventHandler()
.handle(new TaskAttemptEventStartedRemotely(taskAttemptID, containerId, null));
pingContainerHeartbeatHandler(containerId);
}
- @Override
public void taskKilled(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason,
String diagnostics) {
// Regular flow via TaskAttempt will take care of un-registering from the heartbeat handler,
@@ -288,7 +266,6 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
taskAttemptEndReason)));
}
- @Override
public void taskFailed(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason,
String diagnostics) {
// Regular flow via TaskAttempt will take care of un-registering from the heartbeat handler,
@@ -301,6 +278,11 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
taskAttemptEndReason)));
}
+ public void vertexStateUpdateNotificationReceived(VertexStateUpdate event, int taskCommIndex) throws
+ Exception {
+ taskCommunicators[taskCommIndex].onVertexStateUpdated(event);
+ }
+
/**
* Child checking whether it can commit.
@@ -310,7 +292,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
* {@link Task#canCommit(TezTaskAttemptID)} This is * a legacy from the
* centralized commit protocol handling by the JobTracker.
*/
- @Override
+// @Override
public boolean canCommit(TezTaskAttemptID taskAttemptId) throws IOException {
LOG.info("Commit go/no-go request from " + taskAttemptId.toString());
// An attempt is asking if it can commit its output. This can be decided
http://git-wip-us.apache.org/repos/asf/tez/blob/e7c3cc72/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
new file mode 100644
index 0000000..3714c3c
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.util.Set;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
+import org.apache.tez.dag.api.TaskCommunicatorContext;
+import org.apache.tez.dag.api.TaskHeartbeatRequest;
+import org.apache.tez.dag.api.TaskHeartbeatResponse;
+import org.apache.tez.dag.api.TezException;
+import org.apache.tez.dag.api.TezUncheckedException;
+import org.apache.tez.dag.api.event.VertexState;
+import org.apache.tez.dag.api.event.VertexStateUpdate;
+import org.apache.tez.dag.app.dag.VertexStateUpdateListener;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+
+@InterfaceAudience.Private
+public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, VertexStateUpdateListener {
+
+
+ private final AppContext context;
+ private final TaskAttemptListenerImpTezDag taskAttemptListener;
+ private final int taskCommunicatorIndex;
+
+ public TaskCommunicatorContextImpl(AppContext appContext,
+ TaskAttemptListenerImpTezDag taskAttemptListener,
+ int taskCommunicatorIndex) {
+ this.context = appContext;
+ this.taskAttemptListener = taskAttemptListener;
+ this.taskCommunicatorIndex = taskCommunicatorIndex;
+ }
+
+ @Override
+ public ApplicationAttemptId getApplicationAttemptId() {
+ return context.getApplicationAttemptId();
+ }
+
+ @Override
+ public Credentials getCredentials() {
+ return context.getAppCredentials();
+ }
+
+ @Override
+ public boolean canCommit(TezTaskAttemptID taskAttemptId) throws IOException {
+ return taskAttemptListener.canCommit(taskAttemptId);
+ }
+
+ @Override
+ public TaskHeartbeatResponse heartbeat(TaskHeartbeatRequest request) throws IOException,
+ TezException {
+ return taskAttemptListener.heartbeat(request);
+ }
+
+ @Override
+ public boolean isKnownContainer(ContainerId containerId) {
+ return context.getAllContainers().get(containerId) != null;
+ }
+
+ @Override
+ public void taskAlive(TezTaskAttemptID taskAttemptId) {
+ taskAttemptListener.taskAlive(taskAttemptId);
+ }
+
+ @Override
+ public void containerAlive(ContainerId containerId) {
+ taskAttemptListener.containerAlive(containerId);
+ }
+
+ @Override
+ public void taskStartedRemotely(TezTaskAttemptID taskAttemptId, ContainerId containerId) {
+ taskAttemptListener.taskStartedRemotely(taskAttemptId, containerId);
+ }
+
+ @Override
+ public void taskKilled(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason,
+ @Nullable String diagnostics) {
+ taskAttemptListener.taskKilled(taskAttemptId, taskAttemptEndReason, diagnostics);
+ }
+
+ @Override
+ public void taskFailed(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason,
+ @Nullable String diagnostics) {
+ taskAttemptListener.taskFailed(taskAttemptId, taskAttemptEndReason, diagnostics);
+
+ }
+
+ @Override
+ public void registerForVertexStateUpdates(String vertexName,
+ @Nullable Set<VertexState> stateSet) {
+ Preconditions.checkNotNull(vertexName, "VertexName cannot be null: " + vertexName);
+ context.getCurrentDAG().getStateChangeNotifier().registerForVertexUpdates(vertexName, stateSet, this);
+ }
+
+
+ @Override
+ public void onStateUpdated(VertexStateUpdate event) {
+ try {
+ taskAttemptListener.vertexStateUpdateNotificationReceived(event, taskCommunicatorIndex);
+ } catch (Exception e) {
+ // TODO TEZ-2003 This needs to be propagated to the DAG as a user error.
+ throw new TezUncheckedException(e);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/e7c3cc72/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
index ef4f764..1417a3b 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -48,6 +48,7 @@ import org.apache.tez.dag.api.TaskHeartbeatResponse;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.api.TezUncheckedException;
+import org.apache.tez.dag.api.event.VertexStateUpdate;
import org.apache.tez.dag.app.security.authorize.TezAMPolicyProvider;
import org.apache.tez.dag.records.TezTaskAttemptID;
import org.apache.tez.runtime.api.impl.TaskSpec;
@@ -252,6 +253,11 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
return address;
}
+ @Override
+ public void onVertexStateUpdated(VertexStateUpdate stateUpdate) throws Exception {
+ // Empty. Not registering, or expecting any updates.
+ }
+
protected String getTokenIdentifier() {
return tokenIdentifier;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/e7c3cc72/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
index 4c3426a..6d6872b 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
@@ -94,4 +94,6 @@ public interface DAG {
Map<String, TezVertexID> getVertexNameIDMapping();
+ StateChangeNotifier getStateChangeNotifier();
+
}
http://git-wip-us.apache.org/repos/asf/tez/blob/e7c3cc72/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
index 0a87241..4d10711 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
@@ -696,6 +696,11 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
}
@Override
+ public StateChangeNotifier getStateChangeNotifier() {
+ return entityUpdateTracker;
+ }
+
+ @Override
public TezCounters getAllCounters() {
readLock.lock();
[04/43] tez git commit: TEZ-2396. pig-tez-tfile-parser pom is hard
coded to depend on 0.6.0-SNAPSHOT version. (Rajesh Balamohan via hitesh)
Posted by ss...@apache.org.
TEZ-2396. pig-tez-tfile-parser pom is hard coded to depend on 0.6.0-SNAPSHOT version. (Rajesh Balamohan via hitesh)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/16bbc58c
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/16bbc58c
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/16bbc58c
Branch: refs/heads/TEZ-2003
Commit: 16bbc58ca70c7878696521530dc2c6f454b27019
Parents: da3fecd
Author: Hitesh Shah <hi...@apache.org>
Authored: Wed May 6 11:50:53 2015 -0700
Committer: Hitesh Shah <hi...@apache.org>
Committed: Wed May 6 11:50:53 2015 -0700
----------------------------------------------------------------------
BUILDING.txt | 9 ++
CHANGES.txt | 1 +
pom.xml | 7 ++
tez-tools/pom.xml | 50 +++++++++
tez-tools/tez-tfile-parser/README.txt | 2 +-
tez-tools/tez-tfile-parser/pom.xml | 159 +++++++----------------------
6 files changed, 104 insertions(+), 124 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/16bbc58c/BUILDING.txt
----------------------------------------------------------------------
diff --git a/BUILDING.txt b/BUILDING.txt
index 29b92ac..5247b9d 100644
--- a/BUILDING.txt
+++ b/BUILDING.txt
@@ -157,3 +157,12 @@ For example, to generate the state machine graphviz file for DAGImpl, run:
To generate the diagram, you can use a Graphviz application or something like:
$ dot -Tpng -o Tez.png Tez.gv'
+
+----------------------------------------------------------------------------------
+Building contrib tools under tez-tools :
+
+Use -Ptools to build various contrib tools present under tez-tools. For example, run:
+
+ $ mvn package -Ptools
+
+----------------------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/16bbc58c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index f95e626..d3aa8a9 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -176,6 +176,7 @@ Release 0.6.1: Unreleased
INCOMPATIBLE CHANGES
ALL CHANGES:
+ TEZ-2396. pig-tez-tfile-parser pom is hard coded to depend on 0.6.0-SNAPSHOT version.
TEZ-2237. Valid events should be sent out when an Output is not started.
TEZ-1988. Tez UI: does not work when using file:// in a browser
TEZ-2390. tez-tools swimlane tool fails to parse large jobs >8K containers
http://git-wip-us.apache.org/repos/asf/tez/blob/16bbc58c/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 419138b..ce4fa13 100644
--- a/pom.xml
+++ b/pom.xml
@@ -39,6 +39,7 @@
<clover.license>${user.home}/clover.license</clover.license>
<hadoop.version>2.6.0</hadoop.version>
<jetty.version>6.1.26</jetty.version>
+ <pig.version>0.13.0</pig.version>
<distMgmtSnapshotsId>apache.snapshots.https</distMgmtSnapshotsId>
<distMgmtSnapshotsName>Apache Development Snapshot Repository</distMgmtSnapshotsName>
<distMgmtSnapshotsUrl>https://repository.apache.org/content/repositories/snapshots</distMgmtSnapshotsUrl>
@@ -169,6 +170,11 @@
<type>test-jar</type>
</dependency>
<dependency>
+ <groupId>org.apache.pig</groupId>
+ <artifactId>pig</artifactId>
+ <version>${pig.version}</version>
+ </dependency>
+ <dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.5</version>
@@ -631,6 +637,7 @@
<module>tez-dag</module>
<module>tez-ui</module>
<module>tez-plugins</module>
+ <module>tez-tools</module>
<module>tez-dist</module>
<module>docs</module>
</modules>
http://git-wip-us.apache.org/repos/asf/tez/blob/16bbc58c/tez-tools/pom.xml
----------------------------------------------------------------------
diff --git a/tez-tools/pom.xml b/tez-tools/pom.xml
new file mode 100644
index 0000000..bf8fdf8
--- /dev/null
+++ b/tez-tools/pom.xml
@@ -0,0 +1,50 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.tez</groupId>
+ <artifactId>tez</artifactId>
+ <version>0.8.0-SNAPSHOT</version>
+ </parent>
+ <artifactId>tez-tools</artifactId>
+ <packaging>pom</packaging>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+
+ <profiles>
+ <profile>
+ <id>tools</id>
+ <activation>
+ <activeByDefault>false</activeByDefault>
+ </activation>
+ <modules>
+ <module>tez-tfile-parser</module>
+ </modules>
+ </profile>
+ </profiles>
+
+</project>
http://git-wip-us.apache.org/repos/asf/tez/blob/16bbc58c/tez-tools/tez-tfile-parser/README.txt
----------------------------------------------------------------------
diff --git a/tez-tools/tez-tfile-parser/README.txt b/tez-tools/tez-tfile-parser/README.txt
index 09514ed..5b18767 100644
--- a/tez-tools/tez-tfile-parser/README.txt
+++ b/tez-tools/tez-tfile-parser/README.txt
@@ -3,7 +3,7 @@ This is a simple Pig loader to parse TFiles and provide line by line format (tup
Build/Install:
==============
-1. "mvn clean package" should create "tfile-parser-1.0-SNAPSHOT.jar" would be created in ./target directory
+1. "mvn clean package" should create "tfile-parser-x.y.z-SNAPSHOT.jar" would be created in ./target directory
Running pig with tez:
====================
http://git-wip-us.apache.org/repos/asf/tez/blob/16bbc58c/tez-tools/tez-tfile-parser/pom.xml
----------------------------------------------------------------------
diff --git a/tez-tools/tez-tfile-parser/pom.xml b/tez-tools/tez-tfile-parser/pom.xml
index 64191ac..aea1299 100644
--- a/tez-tools/tez-tfile-parser/pom.xml
+++ b/tez-tools/tez-tfile-parser/pom.xml
@@ -15,131 +15,44 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
+ <modelVersion>4.0.0</modelVersion>
- <groupId>pig-tez-tfile-parser</groupId>
- <artifactId>tfile-parser</artifactId>
- <version>1.0-SNAPSHOT</version>
+ <parent>
+ <groupId>org.apache.tez</groupId>
+ <artifactId>tez-tools</artifactId>
+ <version>0.8.0-SNAPSHOT</version>
+ </parent>
+ <artifactId>tez-tfile-parser</artifactId>
- <properties>
- <tez-version>0.8.0-SNAPSHOT</tez-version>
- <hadoop-version>2.6.0</hadoop-version>
- </properties>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-deploy-plugin</artifactId>
+ <configuration>
+ <skip>true</skip>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
- <dependencies>
- <dependency>
- <groupId>org.apache.pig</groupId>
- <artifactId>pig</artifactId>
- <version>0.13.0</version>
- </dependency>
- <dependency>
- <groupId>org.apache.tez</groupId>
- <artifactId>tez-api</artifactId>
- <version>${tez-version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.tez</groupId>
- <artifactId>tez-common</artifactId>
- <version>${tez-version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.tez</groupId>
- <artifactId>tez-runtime-library</artifactId>
- <version>${tez-version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.tez</groupId>
- <artifactId>tez-mapreduce</artifactId>
- <version>${tez-version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- <version>${hadoop-version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-api</artifactId>
- <version>${hadoop-version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-hdfs</artifactId>
- <version>${hadoop-version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-common</artifactId>
- <version>${hadoop-version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-client</artifactId>
- <version>${hadoop-version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-server-nodemanager</artifactId>
- <version>${hadoop-version}</version>
- </dependency>
- <dependency>
- <groupId>commons-cli</groupId>
- <artifactId>commons-cli</artifactId>
- <version>1.1</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.mockito</groupId>
- <artifactId>mockito-core</artifactId>
- <version>1.8.5</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>4.7</version>
- <scope>test</scope>
- </dependency>
- </dependencies>
- <repositories>
- <repository>
- <id>apache.snapshots</id>
- <name>Apache Snapshot Repository</name>
- <url>http://repository.apache.org/snapshots</url>
- <releases>
- <enabled>false</enabled>
- </releases>
- <snapshots>
- <enabled>true</enabled>
- </snapshots>
- </repository>
- </repositories>
- <build>
- <plugins>
- <plugin>
- <artifactId>maven-compiler-plugin</artifactId>
- <configuration>
- <source>1.6</source>
- <target>1.6</target>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-dependency-plugin</artifactId>
- <executions>
- <execution>
- <id>copy-dependencies</id>
- <phase>package</phase>
- <goals>
- <goal>copy-dependencies</goal>
- </goals>
- <configuration>
- <outputDirectory>${project.build.directory}/lib</outputDirectory>
- </configuration>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.pig</groupId>
+ <artifactId>pig</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-common</artifactId>
+ </dependency>
+ </dependencies>
</project>
[26/43] tez git commit: TEZ-2123. Fix component managers to use
pluggable components. Enable hybrid mode. (sseth)
Posted by ss...@apache.org.
TEZ-2123. Fix component managers to use pluggable components. Enable
hybrid mode. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/c47951ab
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/c47951ab
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/c47951ab
Branch: refs/heads/TEZ-2003
Commit: c47951abda6c02b2373f78a6d56553e03523017c
Parents: cee4809
Author: Siddharth Seth <ss...@apache.org>
Authored: Fri Feb 20 11:59:03 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:29 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../org/apache/tez/dag/app/DAGAppMaster.java | 4 +-
.../apache/tez/dag/app/TaskAttemptListener.java | 12 +-
.../dag/app/TaskAttemptListenerImpTezDag.java | 27 ++--
.../tez/dag/app/dag/impl/TaskAttemptImpl.java | 4 +-
.../TezRootInputInitializerContextImpl.java | 2 +-
.../apache/tez/dag/app/dag/impl/VertexImpl.java | 2 +-
.../tez/dag/app/dag/impl/VertexManager.java | 2 +-
.../app/launcher/ContainerLauncherRouter.java | 2 +-
.../app/launcher/LocalContainerLauncher.java | 10 +-
.../rm/AMSchedulerEventDeallocateContainer.java | 7 +-
.../rm/AMSchedulerEventNodeBlacklistUpdate.java | 8 +-
.../tez/dag/app/rm/AMSchedulerEventTAEnded.java | 10 +-
.../dag/app/rm/LocalTaskSchedulerService.java | 19 ++-
.../tez/dag/app/rm/NMCommunicatorEvent.java | 12 +-
.../rm/NMCommunicatorLaunchRequestEvent.java | 11 +-
.../app/rm/NMCommunicatorStopRequestEvent.java | 4 +-
.../dag/app/rm/TaskSchedulerEventHandler.java | 151 ++++++++++++-----
.../tez/dag/app/rm/container/AMContainer.java | 3 +
.../AMContainerEventLaunchRequest.java | 15 +-
.../dag/app/rm/container/AMContainerImpl.java | 39 +++--
.../dag/app/rm/container/AMContainerMap.java | 4 +-
.../apache/tez/dag/app/rm/node/AMNodeImpl.java | 6 +-
.../apache/tez/dag/app/MockDAGAppMaster.java | 2 +-
.../app/TestTaskAttemptListenerImplTezDag.java | 31 ++--
.../tez/dag/app/dag/impl/TestTaskAttempt.java | 69 ++++----
.../tez/dag/app/dag/impl/TestVertexImpl.java | 8 +-
.../tez/dag/app/rm/TestContainerReuse.java | 34 ++--
.../tez/dag/app/rm/TestLocalTaskScheduler.java | 2 +-
.../app/rm/TestLocalTaskSchedulerService.java | 18 ++-
.../app/rm/TestTaskSchedulerEventHandler.java | 11 +-
.../dag/app/rm/TestTaskSchedulerHelpers.java | 2 +-
.../dag/app/rm/container/TestAMContainer.java | 108 +++++++------
.../app/rm/container/TestAMContainerMap.java | 6 +-
.../org/apache/tez/examples/JoinValidate.java | 30 +++-
.../TezTestServiceContainerLauncher.java | 5 +-
.../rm/TezTestServiceTaskSchedulerService.java | 100 ++----------
.../tez/examples/JoinValidateConfigured.java | 53 ++++++
.../tez/tests/TestExternalTezServices.java | 160 ++++++++++++++-----
39 files changed, 638 insertions(+), 356 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 4bfe08f..1a2264c 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -4,5 +4,6 @@ ALL CHANGES:
TEZ-2090. Add tests for jobs running in external services.
TEZ-2117. Add a manager for ContainerLaunchers running in the AM.
TEZ-2122. Setup pluggable components at AM/Vertex level.
+ TEZ-2123. Fix component managers to use pluggable components. (Enable hybrid mode)
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index 6814cda..89b6506 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -513,7 +513,7 @@ public class DAGAppMaster extends AbstractService {
this.taskSchedulerEventHandler = new TaskSchedulerEventHandler(context,
clientRpcServer, dispatcher.getEventHandler(), containerSignatureMatcher, webUIService,
- taskSchedulerClassIdentifiers);
+ taskSchedulerClassIdentifiers, isLocal);
addIfService(taskSchedulerEventHandler, true);
if (enableWebUIService()) {
@@ -2283,6 +2283,7 @@ public class DAGAppMaster extends AbstractService {
// Tez default classnames are populated as TezConfiguration.TEZ_AM_SERVICE_PLUGINS_DEFAULT
private String[] parsePlugins(BiMap<String, Integer> pluginMap, String[] pluginStrings,
String context) {
+ // TODO TEZ-2003 Duplicate error checking - ideally in the client itself. Depends on the final API.
Preconditions.checkState(pluginStrings != null && pluginStrings.length > 0,
"Plugin strings should not be null or empty: " + context);
@@ -2320,6 +2321,7 @@ public class DAGAppMaster extends AbstractService {
}
pluginMap.put(identifierString, index);
classNames[index] = className;
+ index++;
}
return classNames;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListener.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListener.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListener.java
index 9caa7cf..e4dad27 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListener.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListener.java
@@ -22,6 +22,7 @@ import java.net.InetSocketAddress;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.tez.dag.app.dag.DAG;
+import org.apache.tez.dag.api.TaskCommunicator;
import org.apache.tez.dag.app.rm.container.AMContainerTask;
import org.apache.tez.dag.records.TezTaskAttemptID;
/**
@@ -29,18 +30,17 @@ import org.apache.tez.dag.records.TezTaskAttemptID;
*/
public interface TaskAttemptListener {
- InetSocketAddress getAddress();
+ void registerRunningContainer(ContainerId containerId, int taskCommId);
- void registerRunningContainer(ContainerId containerId);
-
- void registerTaskAttempt(AMContainerTask amContainerTask, ContainerId containerId);
+ void registerTaskAttempt(AMContainerTask amContainerTask, ContainerId containerId, int taskCommId);
- void unregisterRunningContainer(ContainerId containerId);
+ void unregisterRunningContainer(ContainerId containerId, int taskCommId);
- void unregisterTaskAttempt(TezTaskAttemptID attemptID);
+ void unregisterTaskAttempt(TezTaskAttemptID attemptID, int taskCommId);
void dagComplete(DAG dag);
void dagSubmitted();
+ TaskCommunicator getTaskCommunicator(int taskCommIndex);
}
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index 2f6dcf5..d21b7d0 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -273,11 +273,6 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
return task.canCommit(taskAttemptId);
}
- @Override
- public InetSocketAddress getAddress() {
- return taskCommunicators[0].getAddress();
- }
-
// The TaskAttemptListener register / unregister methods in this class are not thread safe.
// The Tez framework should not invoke these methods from multiple threads.
@Override
@@ -297,7 +292,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
@Override
- public void registerRunningContainer(ContainerId containerId) {
+ public void registerRunningContainer(ContainerId containerId, int taskCommId) {
if (LOG.isDebugEnabled()) {
LOG.debug("ContainerId: " + containerId + " registered with TaskAttemptListener");
}
@@ -307,11 +302,12 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
"Multiple registrations for containerId: " + containerId);
}
NodeId nodeId = context.getAllContainers().get(containerId).getContainer().getNodeId();
- taskCommunicators[0].registerRunningContainer(containerId, nodeId.getHost(), nodeId.getPort());
+ taskCommunicators[taskCommId].registerRunningContainer(containerId, nodeId.getHost(),
+ nodeId.getPort());
}
@Override
- public void unregisterRunningContainer(ContainerId containerId) {
+ public void unregisterRunningContainer(ContainerId containerId, int taskCommId) {
if (LOG.isDebugEnabled()) {
LOG.debug("Unregistering Container from TaskAttemptListener: " + containerId);
}
@@ -319,12 +315,12 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
if (containerInfo.taskAttemptId != null) {
registeredAttempts.remove(containerInfo.taskAttemptId);
}
- taskCommunicators[0].registerContainerEnd(containerId);
+ taskCommunicators[taskCommId].registerContainerEnd(containerId);
}
@Override
public void registerTaskAttempt(AMContainerTask amContainerTask,
- ContainerId containerId) {
+ ContainerId containerId, int taskCommId) {
ContainerInfo containerInfo = registeredContainers.get(containerId);
if (containerInfo == null) {
throw new TezUncheckedException("Registering task attempt: "
@@ -354,13 +350,13 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
+ amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId
+ " when already assigned to: " + containerIdFromMap);
}
- taskCommunicators[0].registerRunningTaskAttempt(containerId, amContainerTask.getTask(),
+ taskCommunicators[taskCommId].registerRunningTaskAttempt(containerId, amContainerTask.getTask(),
amContainerTask.getAdditionalResources(), amContainerTask.getCredentials(),
amContainerTask.haveCredentialsChanged());
}
@Override
- public void unregisterTaskAttempt(TezTaskAttemptID attemptId) {
+ public void unregisterTaskAttempt(TezTaskAttemptID attemptId, int taskCommId) {
ContainerId containerId = registeredAttempts.remove(attemptId);
if (containerId == null) {
LOG.warn("Unregister task attempt: " + attemptId + " from unknown container");
@@ -374,7 +370,12 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
// Explicitly putting in a new entry so that synchronization is not required on the existing element in the map.
registeredContainers.put(containerId, NULL_CONTAINER_INFO);
- taskCommunicators[0].unregisterRunningTaskAttempt(attemptId);
+ taskCommunicators[taskCommId].unregisterRunningTaskAttempt(attemptId);
+ }
+
+ @Override
+ public TaskCommunicator getTaskCommunicator(int taskCommIndex) {
+ return taskCommunicators[taskCommIndex];
}
private void pingContainerHeartbeatHandler(ContainerId containerId) {
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
index c18dc00..c80571d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
@@ -1218,7 +1218,7 @@ public class TaskAttemptImpl implements TaskAttempt,
// Inform the scheduler
if (sendSchedulerEvent()) {
ta.sendEvent(new AMSchedulerEventTAEnded(ta, ta.containerId, helper
- .getTaskAttemptState()));
+ .getTaskAttemptState(), ta.getVertex().getTaskSchedulerIdentifier()));
}
}
}
@@ -1300,7 +1300,7 @@ public class TaskAttemptImpl implements TaskAttempt,
// Inform the Scheduler.
ta.sendEvent(new AMSchedulerEventTAEnded(ta, ta.containerId,
- TaskAttemptState.SUCCEEDED));
+ TaskAttemptState.SUCCEEDED, ta.getVertex().getTaskSchedulerIdentifier()));
// Inform the task.
ta.sendEvent(new TaskEventTAUpdate(ta.attemptId,
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezRootInputInitializerContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezRootInputInitializerContextImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezRootInputInitializerContextImpl.java
index d4ef4d5..4ca4024 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezRootInputInitializerContextImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezRootInputInitializerContextImpl.java
@@ -96,7 +96,7 @@ public class TezRootInputInitializerContextImpl implements
@Override
public Resource getTotalAvailableResource() {
- return appContext.getTaskScheduler().getTotalResources();
+ return appContext.getTaskScheduler().getTotalResources(vertex.getTaskSchedulerIdentifier());
}
@Override
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
index 097cf3d..18d3368 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
@@ -4396,7 +4396,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
eventHandler, getTotalTasks(),
appContext.getTaskScheduler().getNumClusterNodes(),
getTaskResource(),
- appContext.getTaskScheduler().getTotalResources());
+ appContext.getTaskScheduler().getTotalResources(taskSchedulerIdentifier));
List<RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor>>
inputList = Lists.newArrayListWithCapacity(inputsWithInitializers.size());
for (String inputName : inputsWithInitializers) {
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexManager.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexManager.java
index 945d9ba..1300fc0 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexManager.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexManager.java
@@ -286,7 +286,7 @@ public class VertexManager {
@Override
public synchronized Resource getTotalAvailableResource() {
checkAndThrowIfDone();
- return appContext.getTaskScheduler().getTotalResources();
+ return appContext.getTaskScheduler().getTotalResources(managedVertex.getTaskSchedulerIdentifier());
}
@Override
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
index 621e4a8..4f9b5bf 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
@@ -124,6 +124,6 @@ public class ContainerLauncherRouter extends AbstractService
@Override
public void handle(NMCommunicatorEvent event) {
- containerLaunchers[0].handle(event);
+ containerLaunchers[event.getLauncherId()].handle(event);
}
}
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
index e9ba9d7..9a38732 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
@@ -59,7 +59,6 @@ import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.app.AppContext;
import org.apache.tez.dag.app.TaskAttemptListener;
-import org.apache.tez.dag.app.TaskAttemptListenerImpTezDag;
import org.apache.tez.dag.app.TezTaskCommunicatorImpl;
import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
import org.apache.tez.dag.app.rm.NMCommunicatorLaunchRequestEvent;
@@ -88,9 +87,9 @@ public class LocalContainerLauncher extends AbstractService implements
private static final Logger LOG = LoggerFactory.getLogger(LocalContainerLauncher.class);
private final AppContext context;
- private final TezTaskUmbilicalProtocol taskUmbilicalProtocol;
private final AtomicBoolean serviceStopped = new AtomicBoolean(false);
private final String workingDirectory;
+ private final TaskAttemptListener tal;
private final Map<String, String> localEnv = new HashMap<String, String>();
private final ExecutionContext executionContext;
private int numExecutors;
@@ -116,9 +115,8 @@ public class LocalContainerLauncher extends AbstractService implements
String workingDirectory) throws UnknownHostException {
super(LocalContainerLauncher.class.getName());
this.context = context;
- TaskAttemptListenerImpTezDag taListener = (TaskAttemptListenerImpTezDag)taskAttemptListener;
- TezTaskCommunicatorImpl taskComm = (TezTaskCommunicatorImpl) taListener.getTaskCommunicator();
- this.taskUmbilicalProtocol = taskComm.getUmbilical();
+ this.tal = taskAttemptListener;
+
this.workingDirectory = workingDirectory;
AuxiliaryServiceHelper.setServiceDataIntoEnv(
ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID, ByteBuffer.allocate(4).putInt(0), localEnv);
@@ -219,7 +217,7 @@ public class LocalContainerLauncher extends AbstractService implements
tezChild =
createTezChild(context.getAMConf(), event.getContainerId(), tokenIdentifier,
context.getApplicationAttemptId().getAttemptId(), context.getLocalDirs(),
- taskUmbilicalProtocol,
+ ((TezTaskCommunicatorImpl)tal.getTaskCommunicator(event.getTaskCommId())).getUmbilical(),
TezCommonUtils.parseCredentialsBytes(event.getContainerLaunchContext().getTokens().array()));
} catch (InterruptedException e) {
handleLaunchFailed(e, event.getContainerId());
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventDeallocateContainer.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventDeallocateContainer.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventDeallocateContainer.java
index 1b51920..5270aa2 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventDeallocateContainer.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventDeallocateContainer.java
@@ -23,15 +23,20 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
public class AMSchedulerEventDeallocateContainer extends AMSchedulerEvent {
private final ContainerId containerId;
+ private final int schedulerId;
- public AMSchedulerEventDeallocateContainer(ContainerId containerId) {
+ public AMSchedulerEventDeallocateContainer(ContainerId containerId, int schedulerId) {
super(AMSchedulerEventType.S_CONTAINER_DEALLOCATE);
this.containerId = containerId;
+ this.schedulerId = schedulerId;
}
public ContainerId getContainerId() {
return this.containerId;
}
+ public int getSchedulerId() {
+ return schedulerId;
+ }
}
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventNodeBlacklistUpdate.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventNodeBlacklistUpdate.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventNodeBlacklistUpdate.java
index ed7ebc3..679705a 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventNodeBlacklistUpdate.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventNodeBlacklistUpdate.java
@@ -23,14 +23,20 @@ import org.apache.hadoop.yarn.api.records.NodeId;
public class AMSchedulerEventNodeBlacklistUpdate extends AMSchedulerEvent {
private final NodeId nodeId;
+ private final int schedulerId;
- public AMSchedulerEventNodeBlacklistUpdate(NodeId nodeId, boolean add) {
+ public AMSchedulerEventNodeBlacklistUpdate(NodeId nodeId, boolean add, int schedulerId) {
super((add ? AMSchedulerEventType.S_NODE_BLACKLISTED
: AMSchedulerEventType.S_NODE_UNBLACKLISTED));
this.nodeId = nodeId;
+ this.schedulerId = schedulerId;
}
public NodeId getNodeId() {
return this.nodeId;
}
+
+ public int getSchedulerId() {
+ return schedulerId;
+ }
}
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java
index 90e76b7..2ace642 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java
@@ -26,14 +26,16 @@ public class AMSchedulerEventTAEnded extends AMSchedulerEvent {
private final TaskAttempt attempt;
private final ContainerId containerId;
- private TaskAttemptState state;
+ private final TaskAttemptState state;
+ private final int schedulerId;
public AMSchedulerEventTAEnded(TaskAttempt attempt, ContainerId containerId,
- TaskAttemptState state) {
+ TaskAttemptState state, int schedulerId) {
super(AMSchedulerEventType.S_TA_ENDED);
this.attempt = attempt;
this.containerId = containerId;
this.state = state;
+ this.schedulerId = schedulerId;
}
public TezTaskAttemptID getAttemptID() {
@@ -51,4 +53,8 @@ public class AMSchedulerEventTAEnded extends AMSchedulerEvent {
public ContainerId getUsedContainerId() {
return this.containerId;
}
+
+ public int getSchedulerId() {
+ return schedulerId;
+ }
}
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
index 51d8b9d..72a074f 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
@@ -34,6 +34,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.NodeId;
@@ -63,10 +64,11 @@ public class LocalTaskSchedulerService extends TaskSchedulerService {
final int appHostPort;
final String appTrackingUrl;
final AppContext appContext;
+ final long customContainerAppId;
public LocalTaskSchedulerService(TaskSchedulerAppCallback appClient,
ContainerSignatureMatcher containerSignatureMatcher, String appHostName,
- int appHostPort, String appTrackingUrl, AppContext appContext) {
+ int appHostPort, String appTrackingUrl, long customContainerAppId, AppContext appContext) {
super(LocalTaskSchedulerService.class.getName());
this.realAppClient = appClient;
this.appCallbackExecutor = createAppCallbackExecutorService();
@@ -78,6 +80,7 @@ public class LocalTaskSchedulerService extends TaskSchedulerService {
this.appContext = appContext;
taskRequestQueue = new PriorityBlockingQueue<TaskRequest>();
taskAllocations = new LinkedHashMap<Object, Container>();
+ this.customContainerAppId = customContainerAppId;
}
private ExecutorService createAppCallbackExecutorService() {
@@ -164,7 +167,7 @@ public class LocalTaskSchedulerService extends TaskSchedulerService {
protected AsyncDelegateRequestHandler createRequestHandler(Configuration conf) {
return new AsyncDelegateRequestHandler(taskRequestQueue,
- new LocalContainerFactory(appContext),
+ new LocalContainerFactory(appContext, customContainerAppId),
taskAllocations,
appClientDelegate,
conf);
@@ -195,17 +198,19 @@ public class LocalTaskSchedulerService extends TaskSchedulerService {
}
static class LocalContainerFactory {
- final AppContext appContext;
AtomicInteger nextId;
+ final ApplicationAttemptId customAppAttemptId;
- public LocalContainerFactory(AppContext appContext) {
- this.appContext = appContext;
+ public LocalContainerFactory(AppContext appContext, long appIdLong) {
this.nextId = new AtomicInteger(1);
+ ApplicationId appId = ApplicationId
+ .newInstance(appIdLong, appContext.getApplicationAttemptId().getApplicationId().getId());
+ this.customAppAttemptId = ApplicationAttemptId
+ .newInstance(appId, appContext.getApplicationAttemptId().getAttemptId());
}
public Container createContainer(Resource capability, Priority priority) {
- ApplicationAttemptId appAttemptId = appContext.getApplicationAttemptId();
- ContainerId containerId = ContainerId.newInstance(appAttemptId, nextId.getAndIncrement());
+ ContainerId containerId = ContainerId.newInstance(customAppAttemptId, nextId.getAndIncrement());
NodeId nodeId = NodeId.newInstance("127.0.0.1", 0);
String nodeHttpAddress = "127.0.0.1:0";
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorEvent.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorEvent.java
index 8bdeb28..f86894f 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorEvent.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorEvent.java
@@ -28,13 +28,15 @@ public class NMCommunicatorEvent extends AbstractEvent<NMCommunicatorEventType>
private final ContainerId containerId;
private final NodeId nodeId;
private final Token containerToken;
+ private final int launcherId;
public NMCommunicatorEvent(ContainerId containerId, NodeId nodeId,
- Token containerToken, NMCommunicatorEventType type) {
+ Token containerToken, NMCommunicatorEventType type, int launcherId) {
super(type);
this.containerId = containerId;
this.nodeId = nodeId;
this.containerToken = containerToken;
+ this.launcherId = launcherId;
}
public ContainerId getContainerId() {
@@ -48,10 +50,14 @@ public class NMCommunicatorEvent extends AbstractEvent<NMCommunicatorEventType>
public Token getContainerToken() {
return this.containerToken;
}
-
+
+ public int getLauncherId() {
+ return launcherId;
+ }
+
public String toSrting() {
return super.toString() + " for container " + containerId + ", nodeId: "
- + nodeId;
+ + nodeId + ", launcherId: " + launcherId;
}
@Override
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorLaunchRequestEvent.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorLaunchRequestEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorLaunchRequestEvent.java
index c3b12c0..a38345c 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorLaunchRequestEvent.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorLaunchRequestEvent.java
@@ -25,13 +25,16 @@ public class NMCommunicatorLaunchRequestEvent extends NMCommunicatorEvent {
private final ContainerLaunchContext clc;
private final Container container;
+ // The task communicator index for the specific container being launched.
+ private final int taskCommId;
public NMCommunicatorLaunchRequestEvent(ContainerLaunchContext clc,
- Container container) {
+ Container container, int launcherId, int taskCommId) {
super(container.getId(), container.getNodeId(), container
- .getContainerToken(), NMCommunicatorEventType.CONTAINER_LAUNCH_REQUEST);
+ .getContainerToken(), NMCommunicatorEventType.CONTAINER_LAUNCH_REQUEST, launcherId);
this.clc = clc;
this.container = container;
+ this.taskCommId = taskCommId;
}
public ContainerLaunchContext getContainerLaunchContext() {
@@ -42,6 +45,10 @@ public class NMCommunicatorLaunchRequestEvent extends NMCommunicatorEvent {
return container;
}
+ public int getTaskCommId() {
+ return taskCommId;
+ }
+
@Override
public boolean equals(Object o) {
if (this == o) {
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorStopRequestEvent.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorStopRequestEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorStopRequestEvent.java
index 277d1e7..c9b5c44 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorStopRequestEvent.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/NMCommunicatorStopRequestEvent.java
@@ -25,9 +25,9 @@ import org.apache.hadoop.yarn.api.records.Token;
public class NMCommunicatorStopRequestEvent extends NMCommunicatorEvent {
public NMCommunicatorStopRequestEvent(ContainerId containerId, NodeId nodeId,
- Token containerToken) {
+ Token containerToken, int launcherId) {
super(containerId, nodeId, containerToken,
- NMCommunicatorEventType.CONTAINER_STOP_REQUEST);
+ NMCommunicatorEventType.CONTAINER_STOP_REQUEST, launcherId);
}
}
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
index 72389e7..5a0ace8 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
@@ -108,9 +108,22 @@ public class TaskSchedulerEventHandler extends AbstractService
private final String[] taskSchedulerClasses;
protected final TaskSchedulerService []taskSchedulers;
+ private final boolean isPureLocalMode;
+ // If running in non local-only mode, the YARN task scheduler will always run to take care of
+ // registration with YARN and heartbeats to YARN.
+ // Splitting registration and heartbeats is not straigh-forward due to the taskScheduler being
+ // tied to a ContainerRequestType.
+ private final int yarnTaskSchedulerIndex;
+ // Custom AppIds to avoid container conflicts if there's multiple sources
+ private final long SCHEDULER_APP_ID_BASE = 111101111;
+ private final long SCHEDULER_APP_ID_INCREMENT = 111111111;
+
BlockingQueue<AMSchedulerEvent> eventQueue
= new LinkedBlockingQueue<AMSchedulerEvent>();
+ // Not tracking container / task to schedulerId. Instead relying on everything flowing through
+ // the system and being propagated back via events.
+
/**
*
* @param appContext
@@ -125,7 +138,7 @@ public class TaskSchedulerEventHandler extends AbstractService
public TaskSchedulerEventHandler(AppContext appContext,
DAGClientServer clientService, EventHandler eventHandler,
ContainerSignatureMatcher containerSignatureMatcher, WebUIService webUI,
- String [] schedulerClasses) {
+ String [] schedulerClasses, boolean isPureLocalMode) {
super(TaskSchedulerEventHandler.class.getName());
this.appContext = appContext;
this.eventHandler = eventHandler;
@@ -133,13 +146,39 @@ public class TaskSchedulerEventHandler extends AbstractService
this.containerSignatureMatcher = containerSignatureMatcher;
this.webUI = webUI;
this.historyUrl = getHistoryUrl();
+ this.isPureLocalMode = isPureLocalMode;
if (this.webUI != null) {
this.webUI.setHistoryUrl(this.historyUrl);
}
- if (schedulerClasses == null || schedulerClasses.length == 0) {
- this.taskSchedulerClasses = new String[] {TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+
+ // Override everything for pure local mode
+ if (isPureLocalMode) {
+ this.taskSchedulerClasses = new String[] {TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT};
+ this.yarnTaskSchedulerIndex = -1;
} else {
- this.taskSchedulerClasses = schedulerClasses;
+ if (schedulerClasses == null || schedulerClasses.length ==0) {
+ this.taskSchedulerClasses = new String[] {TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+ this.yarnTaskSchedulerIndex = 0;
+ } else {
+ // Ensure the YarnScheduler will be setup and note it's index. This will be responsible for heartbeats and YARN registration.
+ int foundYarnTaskSchedulerIndex = -1;
+ for (int i = 0 ; i < schedulerClasses.length ; i++) {
+ if (schedulerClasses[i].equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
+ foundYarnTaskSchedulerIndex = i;
+ break;
+ }
+ }
+ if (foundYarnTaskSchedulerIndex == -1) { // Not found. Add at the end.
+ this.taskSchedulerClasses = new String[schedulerClasses.length+1];
+ foundYarnTaskSchedulerIndex = this.taskSchedulerClasses.length -1;
+ for (int i = 0 ; i < schedulerClasses.length ; i++) { // Copy over the rest.
+ this.taskSchedulerClasses[i] = schedulerClasses[i];
+ }
+ } else {
+ this.taskSchedulerClasses = schedulerClasses;
+ }
+ this.yarnTaskSchedulerIndex = foundYarnTaskSchedulerIndex;
+ }
}
taskSchedulers = new TaskSchedulerService[this.taskSchedulerClasses.length];
}
@@ -157,12 +196,12 @@ public class TaskSchedulerEventHandler extends AbstractService
return cachedNodeCount;
}
- public Resource getAvailableResources() {
- return taskSchedulers[0].getAvailableResources();
+ public Resource getAvailableResources(int schedulerId) {
+ return taskSchedulers[schedulerId].getAvailableResources();
}
- public Resource getTotalResources() {
- return taskSchedulers[0].getTotalResources();
+ public Resource getTotalResources(int schedulerId) {
+ return taskSchedulers[schedulerId].getTotalResources();
}
public synchronized void handleEvent(AMSchedulerEvent sEvent) {
@@ -176,7 +215,7 @@ public class TaskSchedulerEventHandler extends AbstractService
switch(event.getState()) {
case FAILED:
case KILLED:
- handleTAUnsuccessfulEnd((AMSchedulerEventTAEnded) sEvent);
+ handleTAUnsuccessfulEnd(event);
break;
case SUCCEEDED:
handleTASucceeded(event);
@@ -228,9 +267,9 @@ public class TaskSchedulerEventHandler extends AbstractService
private void handleNodeBlacklistUpdate(AMSchedulerEventNodeBlacklistUpdate event) {
if (event.getType() == AMSchedulerEventType.S_NODE_BLACKLISTED) {
- taskSchedulers[0].blacklistNode(event.getNodeId());
+ taskSchedulers[event.getSchedulerId()].blacklistNode(event.getNodeId());
} else if (event.getType() == AMSchedulerEventType.S_NODE_UNBLACKLISTED) {
- taskSchedulers[0].unblacklistNode(event.getNodeId());
+ taskSchedulers[event.getSchedulerId()].unblacklistNode(event.getNodeId());
} else {
throw new TezUncheckedException("Invalid event type: " + event.getType());
}
@@ -242,14 +281,14 @@ public class TaskSchedulerEventHandler extends AbstractService
// TODO what happens to the task that was connected to this container?
// current assumption is that it will eventually call handleTaStopRequest
//TaskAttempt taskAttempt = (TaskAttempt)
- taskSchedulers[0].deallocateContainer(containerId);
+ taskSchedulers[event.getSchedulerId()].deallocateContainer(containerId);
// TODO does this container need to be stopped via C_STOP_REQUEST
sendEvent(new AMContainerEventStopRequest(containerId));
}
private void handleTAUnsuccessfulEnd(AMSchedulerEventTAEnded event) {
TaskAttempt attempt = event.getAttempt();
- boolean wasContainerAllocated = taskSchedulers[0].deallocateTask(attempt, false);
+ boolean wasContainerAllocated = taskSchedulers[event.getSchedulerId()].deallocateTask(attempt, false);
// use stored value of container id in case the scheduler has removed this
// assignment because the task has been deallocated earlier.
// retroactive case
@@ -291,7 +330,8 @@ public class TaskSchedulerEventHandler extends AbstractService
event.getAttemptID()));
}
- boolean wasContainerAllocated = taskSchedulers[0].deallocateTask(attempt, true);
+ boolean wasContainerAllocated = taskSchedulers[event.getSchedulerId()].deallocateTask(attempt,
+ true);
if (!wasContainerAllocated) {
LOG.error("De-allocated successful task: " + attempt.getID()
+ ", but TaskScheduler reported no container assigned to task");
@@ -316,7 +356,7 @@ public class TaskSchedulerEventHandler extends AbstractService
TaskAttempt affinityAttempt = vertex.getTask(taskIndex).getSuccessfulAttempt();
if (affinityAttempt != null) {
Preconditions.checkNotNull(affinityAttempt.getAssignedContainerID(), affinityAttempt.getID());
- taskSchedulers[0].allocateTask(taskAttempt,
+ taskSchedulers[event.getSchedulerId()].allocateTask(taskAttempt,
event.getCapability(),
affinityAttempt.getAssignedContainerID(),
Priority.newInstance(event.getPriority()),
@@ -336,7 +376,7 @@ public class TaskSchedulerEventHandler extends AbstractService
}
}
- taskSchedulers[0].allocateTask(taskAttempt,
+ taskSchedulers[event.getSchedulerId()].allocateTask(taskAttempt,
event.getCapability(),
hosts,
racks,
@@ -347,7 +387,8 @@ public class TaskSchedulerEventHandler extends AbstractService
private TaskSchedulerService createTaskScheduler(String host, int port, String trackingUrl,
AppContext appContext,
- String schedulerClassName) {
+ String schedulerClassName,
+ long customAppIdIdentifier) {
if (schedulerClassName.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
LOG.info("Creating TaskScheduler: YarnTaskSchedulerService");
return new YarnTaskSchedulerService(this, this.containerSignatureMatcher,
@@ -355,7 +396,7 @@ public class TaskSchedulerEventHandler extends AbstractService
} else if (schedulerClassName.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
LOG.info("Creating TaskScheduler: Local TaskScheduler");
return new LocalTaskSchedulerService(this, this.containerSignatureMatcher,
- host, port, trackingUrl, appContext);
+ host, port, trackingUrl, customAppIdIdentifier, appContext);
} else {
LOG.info("Creating custom TaskScheduler: " + schedulerClassName);
// TODO TEZ-2003 Temporary reflection with specific parameters. Remove once there is a clean interface.
@@ -364,9 +405,10 @@ public class TaskSchedulerEventHandler extends AbstractService
try {
Constructor<? extends TaskSchedulerService> ctor = taskSchedulerClazz
.getConstructor(TaskSchedulerAppCallback.class, AppContext.class, String.class,
- int.class, String.class, Configuration.class);
+ int.class, String.class, long.class, Configuration.class);
ctor.setAccessible(true);
- return ctor.newInstance(this, appContext, host, port, trackingUrl, getConfig());
+ return ctor.newInstance(this, appContext, host, port, trackingUrl, customAppIdIdentifier,
+ getConfig());
} catch (NoSuchMethodException e) {
throw new TezUncheckedException(e);
} catch (InvocationTargetException e) {
@@ -381,10 +423,19 @@ public class TaskSchedulerEventHandler extends AbstractService
@VisibleForTesting
protected void instantiateScheduelrs(String host, int port, String trackingUrl, AppContext appContext) {
+ // TODO Add error checking for components being used in the Vertex when running in pure local mode.
// Iterate over the list and create all the taskSchedulers
+ int j = 0;
for (int i = 0; i < taskSchedulerClasses.length; i++) {
+ long customAppIdIdentifier;
+ if (isPureLocalMode || taskSchedulerClasses[i].equals(
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) { // Use the app identifier from the appId.
+ customAppIdIdentifier = appContext.getApplicationID().getClusterTimestamp();
+ } else {
+ customAppIdIdentifier = SCHEDULER_APP_ID_BASE + (j++ * SCHEDULER_APP_ID_INCREMENT);
+ }
taskSchedulers[i] = createTaskScheduler(host, port,
- trackingUrl, appContext, taskSchedulerClasses[i]);
+ trackingUrl, appContext, taskSchedulerClasses[i], customAppIdIdentifier);
}
}
@@ -403,12 +454,12 @@ public class TaskSchedulerEventHandler extends AbstractService
for (int i = 0 ; i < taskSchedulers.length ; i++) {
taskSchedulers[i].init(getConfig());
taskSchedulers[i].start();
- }
-
- // TODO TEZ-2118 Start using multiple task schedulers
- if (shouldUnregisterFlag.get()) {
- // Flag may have been set earlier when task scheduler was not initialized
- taskSchedulers[0].setShouldUnregister();
+ if (shouldUnregisterFlag.get()) {
+ // Flag may have been set earlier when task scheduler was not initialized
+ // TODO TEZ-2003 Should setRegister / unregister be part of APIs when not YARN specific ?
+ // External services could need to talk to some other entity.
+ taskSchedulers[i].setShouldUnregister();
+ }
}
this.eventHandlingThread = new Thread("TaskSchedulerEventHandlerThread") {
@@ -457,8 +508,10 @@ public class TaskSchedulerEventHandler extends AbstractService
if (eventHandlingThread != null)
eventHandlingThread.interrupt();
}
- if (taskSchedulers[0] != null) {
- ((AbstractService)taskSchedulers[0]).stop();
+ for (int i = 0 ; i < taskSchedulers.length ; i++) {
+ if (taskSchedulers[i] != null) {
+ taskSchedulers[i].stop();
+ }
}
}
@@ -467,15 +520,18 @@ public class TaskSchedulerEventHandler extends AbstractService
public synchronized void taskAllocated(Object task,
Object appCookie,
Container container) {
+ AMSchedulerEventTALaunchRequest event =
+ (AMSchedulerEventTALaunchRequest) appCookie;
ContainerId containerId = container.getId();
- if (appContext.getAllContainers().addContainerIfNew(container)) {
+ if (appContext.getAllContainers()
+ .addContainerIfNew(container, event.getSchedulerId(), event.getLauncherId(),
+ event.getTaskCommId())) {
appContext.getNodeTracker().nodeSeen(container.getNodeId());
sendEvent(new AMNodeEventContainerAllocated(container
.getNodeId(), container.getId()));
}
- AMSchedulerEventTALaunchRequest event =
- (AMSchedulerEventTALaunchRequest) appCookie;
+
TaskAttempt taskAttempt = event.getTaskAttempt();
// TODO - perhaps check if the task still needs this container
// because the deallocateTask downcall may have raced with the
@@ -484,7 +540,7 @@ public class TaskSchedulerEventHandler extends AbstractService
if (appContext.getAllContainers().get(containerId).getState() == AMContainerState.ALLOCATED) {
sendEvent(new AMContainerEventLaunchRequest(containerId, taskAttempt.getVertexID(),
- event.getContainerContext()));
+ event.getContainerContext(), event.getLauncherId(), event.getTaskCommId()));
}
sendEvent(new DAGEventSchedulerUpdateTAAssigned(taskAttempt, container));
sendEvent(new AMContainerEventAssignTA(containerId, taskAttempt.getID(),
@@ -603,6 +659,9 @@ public class TaskSchedulerEventHandler extends AbstractService
public float getProgress() {
// at this point allocate has been called and so node count must be available
// may change after YARN-1722
+ // This is a heartbeat in from the scheduler into the APP, and is being used to piggy-back and
+ // node updates from the cluster.
+ // TODO Handle this in TEZ-2124. Need a way to know which scheduler is calling in.
int nodeCount = taskSchedulers[0].getClusterNodeCount();
if (nodeCount != cachedNodeCount) {
cachedNodeCount = nodeCount;
@@ -618,7 +677,9 @@ public class TaskSchedulerEventHandler extends AbstractService
}
public void dagCompleted() {
- taskSchedulers[0].dagComplete();
+ for (int i = 0 ; i < taskSchedulers.length ; i++) {
+ taskSchedulers[i].dagComplete();
+ }
}
public void dagSubmitted() {
@@ -628,7 +689,10 @@ public class TaskSchedulerEventHandler extends AbstractService
@Override
public void preemptContainer(ContainerId containerId) {
- taskSchedulers[0].deallocateContainer(containerId);
+ // TODO Why is this making a call back into the scheduler, when the call is originating from there.
+ // An AMContainer instance should already exist if an attempt is being made to preempt it
+ AMContainer amContainer = appContext.getAllContainers().get(containerId);
+ taskSchedulers[amContainer.getTaskSchedulerIdentifier()].deallocateContainer(containerId);
// Inform the Containers about completion.
sendEvent(new AMContainerEventCompleted(containerId, ContainerExitStatus.INVALID,
"Container preempted internally", TaskAttemptTerminationCause.INTERNAL_PREEMPTION));
@@ -637,13 +701,24 @@ public class TaskSchedulerEventHandler extends AbstractService
public void setShouldUnregisterFlag() {
LOG.info("TaskScheduler notified that it should unregister from RM");
this.shouldUnregisterFlag.set(true);
- if (this.taskSchedulers[0] != null) {
- this.taskSchedulers[0].setShouldUnregister();
+ for (int i = 0 ; i < taskSchedulers.length ; i++) {
+ if (this.taskSchedulers[i] != null) {
+ // TODO TEZ-2003 registration required for all schedulers ?
+ this.taskSchedulers[i].setShouldUnregister();
+ }
}
}
public boolean hasUnregistered() {
- return this.taskSchedulers[0].hasUnregistered();
+ boolean result = true;
+ for (int i = 0 ; i < taskSchedulers.length ; i++) {
+ // TODO TEZ-2003 registration required for all schedulers ?
+ result |= this.taskSchedulers[i].hasUnregistered();
+ if (result == false) {
+ return result;
+ }
+ }
+ return result;
}
@VisibleForTesting
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java
index 0fc2e12..6616896 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java
@@ -34,4 +34,7 @@ public interface AMContainer extends EventHandler<AMContainerEvent>{
public List<TezTaskAttemptID> getAllTaskAttempts();
public TezTaskAttemptID getCurrentTaskAttempt();
+ public int getTaskSchedulerIdentifier();
+ public int getContainerLauncherIdentifier();
+ public int getTaskCommunicatorIdentifier();
}
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventLaunchRequest.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventLaunchRequest.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventLaunchRequest.java
index d973264..92e5817 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventLaunchRequest.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventLaunchRequest.java
@@ -27,12 +27,17 @@ public class AMContainerEventLaunchRequest extends AMContainerEvent {
private final TezVertexID vertexId;
private final ContainerContext containerContext;
+ private final int launcherId;
+ private final int taskCommId;
public AMContainerEventLaunchRequest(ContainerId containerId,
- TezVertexID vertexId, ContainerContext containerContext) {
+ TezVertexID vertexId, ContainerContext containerContext,
+ int launcherId, int taskCommId) {
super(containerId, AMContainerEventType.C_LAUNCH_REQUEST);
this.vertexId = vertexId;
this.containerContext = containerContext;
+ this.launcherId = launcherId;
+ this.taskCommId = taskCommId;
}
public TezDAGID getDAGId() {
@@ -46,4 +51,12 @@ public class AMContainerEventLaunchRequest extends AMContainerEvent {
public ContainerContext getContainerContext() {
return this.containerContext;
}
+
+ public int getLauncherId() {
+ return launcherId;
+ }
+
+ public int getTaskCommId() {
+ return taskCommId;
+ }
}
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
index 1acec9c..39df2e8 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
@@ -81,6 +81,9 @@ public class AMContainerImpl implements AMContainer {
private final TaskAttemptListener taskAttemptListener;
protected final EventHandler eventHandler;
private final ContainerSignatureMatcher signatureMatcher;
+ private final int schedulerId;
+ private final int launcherId;
+ private final int taskCommId;
private final List<TezTaskAttemptID> completedAttempts =
new LinkedList<TezTaskAttemptID>();
@@ -302,7 +305,7 @@ public class AMContainerImpl implements AMContainer {
// additional change - JvmID, YarnChild, etc depend on TaskType.
public AMContainerImpl(Container container, ContainerHeartbeatHandler chh,
TaskAttemptListener tal, ContainerSignatureMatcher signatureMatcher,
- AppContext appContext) {
+ AppContext appContext, int schedulerId, int launcherId, int taskCommId) {
ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock();
this.readLock = rwLock.readLock();
this.writeLock = rwLock.writeLock();
@@ -314,6 +317,9 @@ public class AMContainerImpl implements AMContainer {
this.containerHeartbeatHandler = chh;
this.taskAttemptListener = tal;
this.failedAssignments = new LinkedList<TezTaskAttemptID>();
+ this.schedulerId = schedulerId;
+ this.launcherId = launcherId;
+ this.taskCommId = taskCommId;
this.stateMachine = stateMachineFactory.make(this);
}
@@ -363,6 +369,21 @@ public class AMContainerImpl implements AMContainer {
}
}
+ @Override
+ public int getTaskSchedulerIdentifier() {
+ return this.schedulerId;
+ }
+
+ @Override
+ public int getContainerLauncherIdentifier() {
+ return this.launcherId;
+ }
+
+ @Override
+ public int getTaskCommunicatorIdentifier() {
+ return this.taskCommId;
+ }
+
public boolean isInErrorState() {
return inError;
}
@@ -432,7 +453,7 @@ public class AMContainerImpl implements AMContainer {
containerContext.getLocalResources(),
containerContext.getEnvironment(),
containerContext.getJavaOpts(),
- container.taskAttemptListener.getAddress(), containerContext.getCredentials(),
+ container.taskAttemptListener.getTaskCommunicator(container.taskCommId).getAddress(), containerContext.getCredentials(),
container.appContext, container.container.getResource(),
container.appContext.getAMConf());
@@ -1014,7 +1035,7 @@ public class AMContainerImpl implements AMContainer {
}
protected void deAllocate() {
- sendEvent(new AMSchedulerEventDeallocateContainer(containerId));
+ sendEvent(new AMSchedulerEventDeallocateContainer(containerId, schedulerId));
}
protected void sendTerminatedToTaskAttempt(
@@ -1044,28 +1065,28 @@ public class AMContainerImpl implements AMContainer {
}
protected void sendStartRequestToNM(ContainerLaunchContext clc) {
- sendEvent(new NMCommunicatorLaunchRequestEvent(clc, container));
+ sendEvent(new NMCommunicatorLaunchRequestEvent(clc, container, launcherId, taskCommId));
}
protected void sendStopRequestToNM() {
sendEvent(new NMCommunicatorStopRequestEvent(containerId,
- container.getNodeId(), container.getContainerToken()));
+ container.getNodeId(), container.getContainerToken(), launcherId));
}
protected void unregisterAttemptFromListener(TezTaskAttemptID attemptId) {
- taskAttemptListener.unregisterTaskAttempt(attemptId);
+ taskAttemptListener.unregisterTaskAttempt(attemptId, taskCommId);
}
protected void registerAttemptWithListener(AMContainerTask amContainerTask) {
- taskAttemptListener.registerTaskAttempt(amContainerTask, this.containerId);
+ taskAttemptListener.registerTaskAttempt(amContainerTask, this.containerId, taskCommId);
}
protected void registerWithTAListener() {
- taskAttemptListener.registerRunningContainer(containerId);
+ taskAttemptListener.registerRunningContainer(containerId, taskCommId);
}
protected void unregisterFromTAListener() {
- this.taskAttemptListener.unregisterRunningContainer(containerId);
+ this.taskAttemptListener.unregisterRunningContainer(containerId, taskCommId);
}
protected void registerWithContainerListener() {
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerMap.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerMap.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerMap.java
index 574c38e..938096d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerMap.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerMap.java
@@ -62,9 +62,9 @@ public class AMContainerMap extends AbstractService implements EventHandler<AMCo
}
}
- public boolean addContainerIfNew(Container container) {
+ public boolean addContainerIfNew(Container container, int schedulerId, int launcherId, int taskCommId) {
AMContainer amc = new AMContainerImpl(container, chh, tal,
- containerSignatureMatcher, context);
+ containerSignatureMatcher, context, schedulerId, launcherId, taskCommId);
return (containerMap.putIfAbsent(container.getId(), amc) == null);
}
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeImpl.java
index b93cab3..0d8e4cd 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeImpl.java
@@ -257,7 +257,8 @@ public class AMNodeImpl implements AMNode {
// these containers are not useful anymore
pastContainers.addAll(containers);
containers.clear();
- sendEvent(new AMSchedulerEventNodeBlacklistUpdate(getNodeId(), true));
+ // TODO TEZ-2124 node tracking per ext source
+ sendEvent(new AMSchedulerEventNodeBlacklistUpdate(getNodeId(), true, 0));
}
@SuppressWarnings("unchecked")
@@ -363,7 +364,8 @@ public class AMNodeImpl implements AMNode {
public void transition(AMNodeImpl node, AMNodeEvent nEvent) {
node.ignoreBlacklisting = ignore;
if (node.getState() == AMNodeState.BLACKLISTED) {
- node.sendEvent(new AMSchedulerEventNodeBlacklistUpdate(node.getNodeId(), false));
+ // TODO TEZ-2124 node tracking per ext source
+ node.sendEvent(new AMSchedulerEventNodeBlacklistUpdate(node.getNodeId(), false, 0));
}
}
}
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index aace92b..59efb87 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -196,7 +196,7 @@ public class MockDAGAppMaster extends DAGAppMaster {
@Override
public void serviceStart() throws Exception {
taListener = (TaskAttemptListenerImpTezDag) getTaskAttemptListener();
- taskCommunicator = (TezTaskCommunicatorImpl) taListener.getTaskCommunicator();
+ taskCommunicator = (TezTaskCommunicatorImpl) taListener.getTaskCommunicator(0);
eventHandlingThread = new Thread(this);
eventHandlingThread.start();
ExecutorService rawExecutor = Executors.newFixedThreadPool(handlerConcurrency,
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
index 33f4817..98fcddc 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
@@ -110,9 +110,16 @@ public class TestTaskAttemptListenerImplTezDag {
doReturn(dag).when(appContext).getCurrentDAG();
doReturn(appAcls).when(appContext).getApplicationACLs();
doReturn(amContainerMap).when(appContext).getAllContainers();
+ NodeId nodeId = NodeId.newInstance("localhost", 0);
+
+ AMContainer amContainer = mock(AMContainer.class);
+ Container container = mock(Container.class);
+ doReturn(nodeId).when(container).getNodeId();
+ doReturn(amContainer).when(amContainerMap).get(any(ContainerId.class));
+ doReturn(container).when(amContainer).getContainer();
taskAttemptListener = new TaskAttemptListenerImplForTest(appContext,
- mock(TaskHeartbeatHandler.class), mock(ContainerHeartbeatHandler.class), null);
+ mock(TaskHeartbeatHandler.class), mock(ContainerHeartbeatHandler.class), null, null);
taskSpec = mock(TaskSpec.class);
doReturn(taskAttemptID).when(taskSpec).getTaskAttemptID();
@@ -123,7 +130,7 @@ public class TestTaskAttemptListenerImplTezDag {
@Test(timeout = 5000)
public void testGetTask() throws IOException {
- TezTaskCommunicatorImpl taskCommunicator = (TezTaskCommunicatorImpl)taskAttemptListener.getTaskCommunicator();
+ TezTaskCommunicatorImpl taskCommunicator = (TezTaskCommunicatorImpl)taskAttemptListener.getTaskCommunicator(0);
TezTaskUmbilicalProtocol tezUmbilical = taskCommunicator.getUmbilical();
ContainerId containerId1 = createContainerId(appId, 1);
@@ -133,55 +140,55 @@ public class TestTaskAttemptListenerImplTezDag {
ContainerId containerId2 = createContainerId(appId, 2);
ContainerContext containerContext2 = new ContainerContext(containerId2.toString());
- taskAttemptListener.registerRunningContainer(containerId2);
+ taskAttemptListener.registerRunningContainer(containerId2, 0);
containerTask = tezUmbilical.getTask(containerContext2);
assertNull(containerTask);
// Valid task registered
- taskAttemptListener.registerTaskAttempt(amContainerTask, containerId2);
+ taskAttemptListener.registerTaskAttempt(amContainerTask, containerId2, 0);
containerTask = tezUmbilical.getTask(containerContext2);
assertFalse(containerTask.shouldDie());
assertEquals(taskSpec, containerTask.getTaskSpec());
// Task unregistered. Should respond to heartbeats
- taskAttemptListener.unregisterTaskAttempt(taskAttemptId);
+ taskAttemptListener.unregisterTaskAttempt(taskAttemptId, 0);
containerTask = tezUmbilical.getTask(containerContext2);
assertNull(containerTask);
// Container unregistered. Should send a shouldDie = true
- taskAttemptListener.unregisterRunningContainer(containerId2);
+ taskAttemptListener.unregisterRunningContainer(containerId2, 0);
containerTask = tezUmbilical.getTask(containerContext2);
assertTrue(containerTask.shouldDie());
ContainerId containerId3 = createContainerId(appId, 3);
ContainerContext containerContext3 = new ContainerContext(containerId3.toString());
- taskAttemptListener.registerRunningContainer(containerId3);
+ taskAttemptListener.registerRunningContainer(containerId3, 0);
// Register task to container3, followed by unregistering container 3 all together
TaskSpec taskSpec2 = mock(TaskSpec.class);
TezTaskAttemptID taskAttemptId2 = mock(TezTaskAttemptID.class);
doReturn(taskAttemptId2).when(taskSpec2).getTaskAttemptID();
AMContainerTask amContainerTask2 = new AMContainerTask(taskSpec, null, null, false, 0);
- taskAttemptListener.registerTaskAttempt(amContainerTask2, containerId3);
- taskAttemptListener.unregisterRunningContainer(containerId3);
+ taskAttemptListener.registerTaskAttempt(amContainerTask2, containerId3, 0);
+ taskAttemptListener.unregisterRunningContainer(containerId3, 0);
containerTask = tezUmbilical.getTask(containerContext3);
assertTrue(containerTask.shouldDie());
}
@Test(timeout = 5000)
public void testGetTaskMultiplePulls() throws IOException {
- TezTaskCommunicatorImpl taskCommunicator = (TezTaskCommunicatorImpl)taskAttemptListener.getTaskCommunicator();
+ TezTaskCommunicatorImpl taskCommunicator = (TezTaskCommunicatorImpl)taskAttemptListener.getTaskCommunicator(0);
TezTaskUmbilicalProtocol tezUmbilical = taskCommunicator.getUmbilical();
ContainerId containerId1 = createContainerId(appId, 1);
doReturn(mock(AMContainer.class)).when(amContainerMap).get(containerId1);
ContainerContext containerContext1 = new ContainerContext(containerId1.toString());
- taskAttemptListener.registerRunningContainer(containerId1);
+ taskAttemptListener.registerRunningContainer(containerId1, 0);
containerTask = tezUmbilical.getTask(containerContext1);
assertNull(containerTask);
// Register task
- taskAttemptListener.registerTaskAttempt(amContainerTask, containerId1);
+ taskAttemptListener.registerTaskAttempt(amContainerTask, containerId1, 0);
containerTask = tezUmbilical.getTask(containerContext1);
assertFalse(containerTask.shouldDie());
assertEquals(taskSpec, containerTask.getTaskSpec());
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java
index 60c4c88..9df225c 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.util.Clock;
import org.apache.hadoop.yarn.util.SystemClock;
import org.apache.tez.common.MockDNSToSwitchMapping;
+import org.apache.tez.dag.api.TaskCommunicator;
import org.apache.tez.dag.api.TaskLocationHint;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
@@ -273,8 +274,9 @@ public class TestTaskAttempt {
MockEventHandler eventHandler = new MockEventHandler();
TaskAttemptListener taListener = mock(TaskAttemptListener.class);
- when(taListener.getAddress()).thenReturn(
- new InetSocketAddress("localhost", 0));
+ TaskCommunicator taskComm = mock(TaskCommunicator.class);
+ doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+ doReturn(taskComm).when(taListener).getTaskCommunicator(0);
Configuration taskConf = new Configuration();
taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -323,8 +325,9 @@ public class TestTaskAttempt {
MockEventHandler eventHandler = spy(new MockEventHandler());
TaskAttemptListener taListener = mock(TaskAttemptListener.class);
- when(taListener.getAddress()).thenReturn(
- new InetSocketAddress("localhost", 0));
+ TaskCommunicator taskComm = mock(TaskCommunicator.class);
+ doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+ doReturn(taskComm).when(taListener).getTaskCommunicator(0);
Configuration taskConf = new Configuration();
taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -345,7 +348,7 @@ public class TestTaskAttempt {
AMContainerMap containers = new AMContainerMap(
mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
new ContainerContextMatcher(), appCtx);
- containers.addContainerIfNew(container);
+ containers.addContainerIfNew(container, 0, 0, 0);
doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
doReturn(containers).when(appCtx).getAllContainers();
@@ -424,8 +427,9 @@ public class TestTaskAttempt {
MockEventHandler eventHandler = new MockEventHandler();
TaskAttemptListener taListener = mock(TaskAttemptListener.class);
- when(taListener.getAddress()).thenReturn(
- new InetSocketAddress("localhost", 0));
+ TaskCommunicator taskComm = mock(TaskCommunicator.class);
+ doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+ doReturn(taskComm).when(taListener).getTaskCommunicator(0);
Configuration taskConf = new Configuration();
taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -446,7 +450,7 @@ public class TestTaskAttempt {
AMContainerMap containers = new AMContainerMap(
mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
new ContainerContextMatcher(), appCtx);
- containers.addContainerIfNew(container);
+ containers.addContainerIfNew(container, 0, 0, 0);
doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
doReturn(containers).when(appCtx).getAllContainers();
@@ -489,8 +493,9 @@ public class TestTaskAttempt {
MockEventHandler eventHandler = spy(new MockEventHandler());
TaskAttemptListener taListener = mock(TaskAttemptListener.class);
- when(taListener.getAddress()).thenReturn(
- new InetSocketAddress("localhost", 0));
+ TaskCommunicator taskComm = mock(TaskCommunicator.class);
+ doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+ doReturn(taskComm).when(taListener).getTaskCommunicator(0);
Configuration taskConf = new Configuration();
taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -511,7 +516,7 @@ public class TestTaskAttempt {
AMContainerMap containers = new AMContainerMap(
mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
new ContainerContextMatcher(), appCtx);
- containers.addContainerIfNew(container);
+ containers.addContainerIfNew(container, 0, 0, 0);
doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
doReturn(containers).when(appCtx).getAllContainers();
@@ -581,8 +586,9 @@ public class TestTaskAttempt {
MockEventHandler eventHandler = spy(new MockEventHandler());
TaskAttemptListener taListener = mock(TaskAttemptListener.class);
- when(taListener.getAddress()).thenReturn(
- new InetSocketAddress("localhost", 0));
+ TaskCommunicator taskComm = mock(TaskCommunicator.class);
+ doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+ doReturn(taskComm).when(taListener).getTaskCommunicator(0);
Configuration taskConf = new Configuration();
taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -604,7 +610,7 @@ public class TestTaskAttempt {
AMContainerMap containers = new AMContainerMap(
mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
new ContainerContextMatcher(), appCtx);
- containers.addContainerIfNew(container);
+ containers.addContainerIfNew(container, 0, 0, 0);
doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
doReturn(containers).when(appCtx).getAllContainers();
@@ -712,8 +718,9 @@ public class TestTaskAttempt {
MockEventHandler eventHandler = spy(new MockEventHandler());
TaskAttemptListener taListener = mock(TaskAttemptListener.class);
- when(taListener.getAddress()).thenReturn(
- new InetSocketAddress("localhost", 0));
+ TaskCommunicator taskComm = mock(TaskCommunicator.class);
+ doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+ doReturn(taskComm).when(taListener).getTaskCommunicator(0);
Configuration taskConf = new Configuration();
taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -735,7 +742,7 @@ public class TestTaskAttempt {
AMContainerMap containers = new AMContainerMap(
mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
new ContainerContextMatcher(), appCtx);
- containers.addContainerIfNew(container);
+ containers.addContainerIfNew(container, 0, 0, 0);
doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
doReturn(containers).when(appCtx).getAllContainers();
@@ -804,8 +811,9 @@ public class TestTaskAttempt {
MockEventHandler eventHandler = spy(new MockEventHandler());
TaskAttemptListener taListener = mock(TaskAttemptListener.class);
- when(taListener.getAddress()).thenReturn(
- new InetSocketAddress("localhost", 0));
+ TaskCommunicator taskComm = mock(TaskCommunicator.class);
+ doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+ doReturn(taskComm).when(taListener).getTaskCommunicator(0);
Configuration taskConf = new Configuration();
taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -826,7 +834,7 @@ public class TestTaskAttempt {
AMContainerMap containers = new AMContainerMap(
mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
new ContainerContextMatcher(), appCtx);
- containers.addContainerIfNew(container);
+ containers.addContainerIfNew(container, 0, 0, 0);
doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
doReturn(containers).when(appCtx).getAllContainers();
@@ -899,8 +907,9 @@ public class TestTaskAttempt {
MockEventHandler eventHandler = spy(new MockEventHandler());
TaskAttemptListener taListener = mock(TaskAttemptListener.class);
- when(taListener.getAddress()).thenReturn(
- new InetSocketAddress("localhost", 0));
+ TaskCommunicator taskComm = mock(TaskCommunicator.class);
+ doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+ doReturn(taskComm).when(taListener).getTaskCommunicator(0);
Configuration taskConf = new Configuration();
taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -921,7 +930,7 @@ public class TestTaskAttempt {
AMContainerMap containers = new AMContainerMap(
mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
new ContainerContextMatcher(), appCtx);
- containers.addContainerIfNew(container);
+ containers.addContainerIfNew(container, 0, 0, 0);
doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
doReturn(containers).when(appCtx).getAllContainers();
@@ -1002,8 +1011,9 @@ public class TestTaskAttempt {
MockEventHandler eventHandler = spy(new MockEventHandler());
TaskAttemptListener taListener = mock(TaskAttemptListener.class);
- when(taListener.getAddress()).thenReturn(
- new InetSocketAddress("localhost", 0));
+ TaskCommunicator taskComm = mock(TaskCommunicator.class);
+ doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+ doReturn(taskComm).when(taListener).getTaskCommunicator(0);
Configuration taskConf = new Configuration();
taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -1024,7 +1034,7 @@ public class TestTaskAttempt {
AMContainerMap containers = new AMContainerMap(
mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
new ContainerContextMatcher(), appCtx);
- containers.addContainerIfNew(container);
+ containers.addContainerIfNew(container, 0, 0, 0);
doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
doReturn(containers).when(appCtx).getAllContainers();
@@ -1102,8 +1112,9 @@ public class TestTaskAttempt {
MockEventHandler mockEh = new MockEventHandler();
MockEventHandler eventHandler = spy(mockEh);
TaskAttemptListener taListener = mock(TaskAttemptListener.class);
- when(taListener.getAddress()).thenReturn(
- new InetSocketAddress("localhost", 0));
+ TaskCommunicator taskComm = mock(TaskCommunicator.class);
+ doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+ doReturn(taskComm).when(taListener).getTaskCommunicator(0);
Configuration taskConf = new Configuration();
taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
@@ -1124,7 +1135,7 @@ public class TestTaskAttempt {
AMContainerMap containers = new AMContainerMap(
mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
new ContainerContextMatcher(), appCtx);
- containers.addContainerIfNew(container);
+ containers.addContainerIfNew(container, 0, 0, 0);
doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
doReturn(containers).when(appCtx).getAllContainers();
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
index a8eaca1..0aa3118 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
@@ -2176,7 +2176,7 @@ public class TestVertexImpl {
doReturn(dagId).when(appContext).getCurrentDAGID();
doReturn(dagId).when(dag).getID();
doReturn(taskScheduler).when(appContext).getTaskScheduler();
- doReturn(Resource.newInstance(102400, 60)).when(taskScheduler).getTotalResources();
+ doReturn(Resource.newInstance(102400, 60)).when(taskScheduler).getTotalResources(0);
doReturn(historyEventHandler).when(appContext).getHistoryHandler();
doReturn(dispatcher.getEventHandler()).when(appContext).getEventHandler();
@@ -3041,7 +3041,7 @@ public class TestVertexImpl {
AMContainerMap containers = new AMContainerMap(
mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
new ContainerContextMatcher(), appContext);
- containers.addContainerIfNew(container);
+ containers.addContainerIfNew(container, 0, 0, 0);
doReturn(containers).when(appContext).getAllContainers();
ta.handle(new TaskAttemptEventStartedRemotely(ta.getID(), contId, null));
@@ -3076,7 +3076,7 @@ public class TestVertexImpl {
AMContainerMap containers = new AMContainerMap(
mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
new ContainerContextMatcher(), appContext);
- containers.addContainerIfNew(container);
+ containers.addContainerIfNew(container, 0, 0, 0);
doReturn(containers).when(appContext).getAllContainers();
ta.handle(new TaskAttemptEventStartedRemotely(ta.getID(), contId, null));
@@ -3112,7 +3112,7 @@ public class TestVertexImpl {
AMContainerMap containers = new AMContainerMap(
mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
new ContainerContextMatcher(), appContext);
- containers.addContainerIfNew(container);
+ containers.addContainerIfNew(container, 0, 0, 0);
doReturn(containers).when(appContext).getAllContainers();
ta.handle(new TaskAttemptEventStartedRemotely(ta.getID(), contId, null));
[03/43] tez git commit: TEZ-2417. Tez UI: Counters are blank in the
Attempts page if all attempts failed (Sreenath Somarajapuram via
pramachandran)
Posted by ss...@apache.org.
TEZ-2417. Tez UI: Counters are blank in the Attempts page if all attempts failed (Sreenath Somarajapuram via pramachandran)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/da3fecd5
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/da3fecd5
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/da3fecd5
Branch: refs/heads/TEZ-2003
Commit: da3fecd53df56b96e29c8608068bf46a1995d611
Parents: 6a04fa4
Author: Prakash Ramachandran <pr...@hortonworks.com>
Authored: Wed May 6 19:27:39 2015 +0530
Committer: Prakash Ramachandran <pr...@hortonworks.com>
Committed: Wed May 6 19:27:39 2015 +0530
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../scripts/controllers/shared-controllers.js | 1 -
.../controllers/task-counters-controller.js | 28 ++++++++++++++++++++
.../app/scripts/models/TimelineRestAdapter.js | 2 +-
.../webapp/app/templates/common/counters.hbs | 18 ++++++++-----
5 files changed, 42 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/da3fecd5/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 91dd9c4..f95e626 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -15,6 +15,7 @@ INCOMPATIBLE CHANGES
TEZ-1993. Implement a pluggable InputSizeEstimator for grouping fairly
ALL CHANGES:
+ TEZ-2417. Tez UI: Counters are blank in the Attempts page if all attempts failed
TEZ-2366. Pig tez MiniTezCluster unit tests fail intermittently after TEZ-2333
TEZ-2406. Tez UI: Display per-io counter columns in task and attempt pages under vertex
TEZ-2384. Add warning message in the case of prewarn under non-session mode.
http://git-wip-us.apache.org/repos/asf/tez/blob/da3fecd5/tez-ui/src/main/webapp/app/scripts/controllers/shared-controllers.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/controllers/shared-controllers.js b/tez-ui/src/main/webapp/app/scripts/controllers/shared-controllers.js
index 70d5e8c..e63fcf5 100644
--- a/tez-ui/src/main/webapp/app/scripts/controllers/shared-controllers.js
+++ b/tez-ui/src/main/webapp/app/scripts/controllers/shared-controllers.js
@@ -18,7 +18,6 @@
App.DagCountersController =
App.VertexCountersController =
- App.TaskCountersController =
App.TaskAttemptCountersController =
Em.ObjectController.extend(App.ModelRefreshMixin);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tez/blob/da3fecd5/tez-ui/src/main/webapp/app/scripts/controllers/task-counters-controller.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/controllers/task-counters-controller.js b/tez-ui/src/main/webapp/app/scripts/controllers/task-counters-controller.js
new file mode 100644
index 0000000..5d514bd
--- /dev/null
+++ b/tez-ui/src/main/webapp/app/scripts/controllers/task-counters-controller.js
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+App.TaskCountersController = Em.ObjectController.extend(App.ModelRefreshMixin, {
+ message: function () {
+ var status = this.get('content.status');
+ if(!this.get('content.counterGroups.length')) {
+ if(status == 'KILLED' || status == 'FAILED') {
+ return 'Task %@, please check the counters of individual task attempts.'.fmt(status);
+ }
+ }
+ }.property('content.status', 'content.counterGroups.length')
+});
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tez/blob/da3fecd5/tez-ui/src/main/webapp/app/scripts/models/TimelineRestAdapter.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/models/TimelineRestAdapter.js b/tez-ui/src/main/webapp/app/scripts/models/TimelineRestAdapter.js
index 5db0990..704ba2f 100644
--- a/tez-ui/src/main/webapp/app/scripts/models/TimelineRestAdapter.js
+++ b/tez-ui/src/main/webapp/app/scripts/models/TimelineRestAdapter.js
@@ -93,7 +93,7 @@ var timelineJsonToDagMap = {
endTime: 'otherinfo.endTime',
name: 'primaryfilters.dagName.0',
user: 'primaryfilters.user.0',
- applicationId: 'otherinfo.applicationId',
+ applicationId: 'primaryfilters.applicationId.0',
status: 'otherinfo.status',
hasFailedTaskAttempts: {
custom: function(source) {
http://git-wip-us.apache.org/repos/asf/tez/blob/da3fecd5/tez-ui/src/main/webapp/app/templates/common/counters.hbs
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/templates/common/counters.hbs b/tez-ui/src/main/webapp/app/templates/common/counters.hbs
index 30419e1..44d0b2c 100644
--- a/tez-ui/src/main/webapp/app/templates/common/counters.hbs
+++ b/tez-ui/src/main/webapp/app/templates/common/counters.hbs
@@ -21,9 +21,15 @@
time=timeStamp
refresh='refresh'
}}
-<div class='table-container margin-small-vertical'>
- {{counter-table-component
- data=counterGroups
- timeStamp=timeStamp
- }}
-</div>
+{{#if counterGroups.length}}
+ <div class='table-container margin-small-vertical'>
+ {{counter-table-component
+ data=counterGroups
+ timeStamp=timeStamp
+ }}
+ </div>
+{{else}}
+ <h4>Counters not available!</h4>
+{{/if}}
+<h6>{{message}}</h6>
+
[38/43] tez git commit: TEZ-2347. Expose additional information in
TaskCommunicatorContext. (sseth)
Posted by ss...@apache.org.
TEZ-2347. Expose additional information in TaskCommunicatorContext. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/44f5e8f1
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/44f5e8f1
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/44f5e8f1
Branch: refs/heads/TEZ-2003
Commit: 44f5e8f1b8392ec34177c7bfb968e4f6fcb3c6bb
Parents: e7c3cc7
Author: Siddharth Seth <ss...@apache.org>
Authored: Mon Apr 20 13:17:31 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:55 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../tez/dag/api/TaskCommunicatorContext.java | 50 ++++++++++++++++++++
.../dag/app/TaskCommunicatorContextImpl.java | 50 ++++++++++++++++++++
.../java/org/apache/tez/dag/app/dag/DAG.java | 2 +
.../java/org/apache/tez/dag/app/dag/Task.java | 1 +
.../org/apache/tez/dag/app/dag/TaskAttempt.java | 6 +++
.../apache/tez/dag/app/dag/impl/DAGImpl.java | 10 ++++
.../tez/dag/app/dag/impl/TaskAttemptImpl.java | 12 +++++
.../apache/tez/dag/app/dag/impl/TaskImpl.java | 13 ++++-
9 files changed, 144 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index ca5225e..7c13110 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -16,5 +16,6 @@ ALL CHANGES:
TEZ-2284. Separate TaskReporter into an interface.
TEZ-2285. Allow TaskCommunicators to indicate task/container liveness.
TEZ-2302. Allow TaskCommunicators to subscribe for Vertex updates.
+ TEZ-2347. Expose additional information in TaskCommunicatorContext.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
index 19caed9..56345ab 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
@@ -16,6 +16,7 @@ package org.apache.tez.dag.api;
import javax.annotation.Nullable;
import java.io.IOException;
+import java.util.Collection;
import java.util.Set;
import org.apache.hadoop.security.Credentials;
@@ -71,4 +72,53 @@ public interface TaskCommunicatorContext {
// TODO TEZ-2003 API. Should a method exist for task succeeded.
// TODO Eventually Add methods to report availability stats to the scheduler.
+
+ /**
+ * Get the name of the currently executing dag
+ * @return the name of the currently executing dag
+ */
+ String getCurretnDagName();
+
+ /**
+ * Get the name of the Input vertices for the specified vertex.
+ * Root Inputs are not returned.
+ * @param vertexName the vertex for which source vertex names will be returned
+ * @return an Iterable containing the list of input vertices for the specified vertex
+ */
+ Iterable<String> getInputVertexNames(String vertexName);
+
+ /**
+ * Get the total number of tasks in the given vertex
+ * @param vertexName
+ * @return total number of tasks in this vertex
+ */
+ int getVertexTotalTaskCount(String vertexName);
+
+ /**
+ * Get the number of completed tasks for a given vertex
+ * @param vertexName the vertex name
+ * @return the number of completed tasks for the vertex
+ */
+ int getVertexCompletedTaskCount(String vertexName);
+
+ /**
+ * Get the number of running tasks for a given vertex
+ * @param vertexName the vertex name
+ * @return the number of running tasks for the vertex
+ */
+ int getVertexRunningTaskCount(String vertexName);
+
+ /**
+ * Get the start time for the first attempt of the specified task
+ * @param vertexName the vertex to which the task belongs
+ * @param taskIndex the index of the task
+ * @return the start time for the first attempt of the task
+ */
+ long getFirstAttemptStartTime(String vertexName, int taskIndex);
+
+ /**
+ * Get the start time for the currently executing DAG
+ * @return time when the current dag started executing
+ */
+ long getDagStartTime();
}
http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
index 3714c3c..4cb0c93 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
@@ -18,7 +18,9 @@ import javax.annotation.Nullable;
import java.io.IOException;
import java.util.Set;
+import com.google.common.base.Function;
import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
@@ -31,6 +33,7 @@ import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.api.event.VertexState;
import org.apache.tez.dag.api.event.VertexStateUpdate;
+import org.apache.tez.dag.app.dag.Vertex;
import org.apache.tez.dag.app.dag.VertexStateUpdateListener;
import org.apache.tez.dag.records.TezTaskAttemptID;
@@ -111,6 +114,53 @@ public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, Ver
context.getCurrentDAG().getStateChangeNotifier().registerForVertexUpdates(vertexName, stateSet, this);
}
+ @Override
+ public String getCurretnDagName() {
+ return context.getCurrentDAG().getName();
+ }
+
+ @Override
+ public Iterable<String> getInputVertexNames(String vertexName) {
+ Preconditions.checkNotNull(vertexName, "VertexName cannot be null: " + vertexName);
+ Vertex vertex = context.getCurrentDAG().getVertex(vertexName);
+ Set<Vertex> sources = vertex.getInputVertices().keySet();
+ return Iterables.transform(sources, new Function<Vertex, String>() {
+ @Override
+ public String apply(@Nullable Vertex input) {
+ return input.getName();
+ }
+ });
+ }
+
+ @Override
+ public int getVertexTotalTaskCount(String vertexName) {
+ Preconditions.checkArgument(vertexName != null, "VertexName must be specified");
+ return context.getCurrentDAG().getVertex(vertexName).getTotalTasks();
+ }
+
+ @Override
+ public int getVertexCompletedTaskCount(String vertexName) {
+ Preconditions.checkArgument(vertexName != null, "VertexName must be specified");
+ return context.getCurrentDAG().getVertex(vertexName).getCompletedTasks();
+ }
+
+ @Override
+ public int getVertexRunningTaskCount(String vertexName) {
+ Preconditions.checkArgument(vertexName != null, "VertexName must be specified");
+ return context.getCurrentDAG().getVertex(vertexName).getRunningTasks();
+ }
+
+ @Override
+ public long getFirstAttemptStartTime(String vertexName, int taskIndex) {
+ Preconditions.checkArgument(vertexName != null, "VertexName must be specified");
+ Preconditions.checkArgument(taskIndex >=0, "TaskIndex must be > 0");
+ return context.getCurrentDAG().getVertex(vertexName).getTask(taskIndex).getFirstAttemptStartTime();
+ }
+
+ @Override
+ public long getDagStartTime() {
+ return context.getCurrentDAG().getStartTime();
+ }
@Override
public void onStateUpdated(VertexStateUpdate event) {
http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
index 6d6872b..458362f 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
@@ -94,6 +94,8 @@ public interface DAG {
Map<String, TezVertexID> getVertexNameIDMapping();
+ long getStartTime();
+
StateChangeNotifier getStateChangeNotifier();
}
http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Task.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Task.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Task.java
index b798fce..3af14b5 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Task.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Task.java
@@ -66,4 +66,5 @@ public interface Task {
public void registerTezEvent(TezEvent tezEvent);
+ long getFirstAttemptStartTime();
}
http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java
index 6c85cc2..26613e9 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java
@@ -122,6 +122,12 @@ public interface TaskAttempt {
*/
long getLaunchTime();
+ /**
+ * Get the time at which this attempt was scheduled
+ * @return the time at which this attempt was scheduled, 0 if it hasn't been scheduled yet
+ */
+ long getScheduleTime();
+
/**
* @return attempt's finish time. If attempt is not finished
* yet, returns 0.
http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
index 4d10711..7870697 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
@@ -696,6 +696,16 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
}
@Override
+ public long getStartTime() {
+ readLock.lock();
+ try {
+ return this.startTime;
+ } finally {
+ readLock.unlock();
+ }
+ }
+
+ @Override
public StateChangeNotifier getStateChangeNotifier() {
return entityUpdateTracker;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
index 11d4df9..092520d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
@@ -135,6 +135,7 @@ public class TaskAttemptImpl implements TaskAttempt,
protected final AppContext appContext;
private final TaskHeartbeatHandler taskHeartbeatHandler;
private long launchTime = 0;
+ private long scheduleTime = 0;
private long finishTime = 0;
private String trackerName;
private int httpPort;
@@ -671,6 +672,16 @@ public class TaskAttemptImpl implements TaskAttempt,
}
@Override
+ public long getScheduleTime() {
+ readLock.lock();
+ try {
+ return scheduleTime;
+ } finally {
+ readLock.unlock();
+ }
+ }
+
+ @Override
public long getFinishTime() {
readLock.lock();
try {
@@ -1030,6 +1041,7 @@ public class TaskAttemptImpl implements TaskAttempt,
public TaskAttemptStateInternal transition(TaskAttemptImpl ta, TaskAttemptEvent event) {
TaskAttemptEventSchedule scheduleEvent = (TaskAttemptEventSchedule) event;
+ ta.scheduleTime = ta.clock.getTime();
// TODO Creating the remote task here may not be required in case of
// recovery.
http://git-wip-us.apache.org/repos/asf/tez/blob/44f5e8f1/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
index 2e884e7..4319eb3 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
@@ -1492,7 +1492,18 @@ public class TaskImpl implements Task, EventHandler<TaskEvent> {
this.writeLock.unlock();
}
}
-
+
+ @Override
+ public long getFirstAttemptStartTime() {
+ readLock.lock();
+ try {
+ // The first attempt will always have an index of 0.
+ return getAttempt(TezTaskAttemptID.getInstance(getTaskId(), 0)).getScheduleTime();
+ } finally {
+ readLock.unlock();
+ }
+ }
+
private static class KillTransition
implements SingleArcTransition<TaskImpl, TaskEvent> {
@Override
[14/43] tez git commit: TEZ-776. Reduce AM mem usage caused by
storing TezEvents (bikas)
Posted by ss...@apache.org.
TEZ-776. Reduce AM mem usage caused by storing TezEvents (bikas)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/05f77fe2
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/05f77fe2
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/05f77fe2
Branch: refs/heads/TEZ-2003
Commit: 05f77fe2b210341a16ead9fc51e53093c836d860
Parents: a382324
Author: Bikas Saha <bi...@apache.org>
Authored: Thu May 7 15:44:38 2015 -0700
Committer: Bikas Saha <bi...@apache.org>
Committed: Thu May 7 15:44:38 2015 -0700
----------------------------------------------------------------------
CHANGES.txt | 1 +
tez-api/findbugs-exclude.xml | 12 +
.../apache/tez/dag/api/EdgeManagerPlugin.java | 4 +-
.../tez/dag/api/EdgeManagerPluginOnDemand.java | 332 +++++++++++++++++++
.../api/events/CompositeDataMovementEvent.java | 21 ++
.../runtime/api/events/DataMovementEvent.java | 33 +-
.../runtime/api/events/InputFailedEvent.java | 16 +
tez-dag/findbugs-exclude.xml | 5 +-
.../tez/dag/app/TaskAttemptEventInfo.java | 40 +++
.../dag/app/TaskAttemptListenerImpTezDag.java | 6 +-
.../java/org/apache/tez/dag/app/dag/Vertex.java | 5 +
.../dag/app/dag/impl/BroadcastEdgeManager.java | 41 ++-
.../org/apache/tez/dag/app/dag/impl/Edge.java | 244 ++++++++++++--
.../dag/app/dag/impl/OneToOneEdgeManager.java | 67 +++-
.../app/dag/impl/ScatterGatherEdgeManager.java | 83 ++++-
.../apache/tez/dag/app/dag/impl/TaskImpl.java | 6 -
.../apache/tez/dag/app/dag/impl/VertexImpl.java | 323 ++++++++++++++----
.../apache/tez/dag/app/MockDAGAppMaster.java | 25 +-
.../tez/dag/app/TestMemoryWithEvents.java | 219 ++++++++++++
.../tez/dag/app/TestMockDAGAppMaster.java | 160 ++++++++-
.../app/TestTaskAttemptListenerImplTezDag.java | 33 +-
.../tez/dag/app/dag/impl/TestDAGImpl.java | 90 ++++-
.../tez/dag/app/dag/impl/TestVertexImpl.java | 208 +++++++++++-
.../org/apache/tez/test/EdgeManagerForTest.java | 33 +-
.../org/apache/tez/runtime/RuntimeTask.java | 10 +
.../runtime/api/impl/TezHeartbeatResponse.java | 12 +
.../apache/tez/runtime/task/TaskReporter.java | 12 +-
.../vertexmanager/ShuffleVertexManager.java | 117 ++++++-
.../tez/test/TestExceptionPropagation.java | 39 ++-
29 files changed, 2000 insertions(+), 197 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 8de61b0..ba8e9d8 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,7 @@ INCOMPATIBLE CHANGES
Default max limit increased. Should not affect existing users.
ALL CHANGES:
+ TEZ-776. Reduce AM mem usage caused by storing TezEvents
TEZ-2423. Tez UI: Remove Attempt Index column from task->attempts page
TEZ-2416. Tez UI: Make tooltips display faster.
TEZ-2404. Handle DataMovementEvent before its TaskAttemptCompletedEvent
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-api/findbugs-exclude.xml
----------------------------------------------------------------------
diff --git a/tez-api/findbugs-exclude.xml b/tez-api/findbugs-exclude.xml
index b928a44..07792e6 100644
--- a/tez-api/findbugs-exclude.xml
+++ b/tez-api/findbugs-exclude.xml
@@ -85,4 +85,16 @@
<Bug pattern="SWL_SLEEP_WITH_LOCK_HELD"/>
</Match>
+ <Match>
+ <Class name="org.apache.tez.dag.api.EdgeManagerPluginOnDemand$EventRouteMetadata"/>
+ <Method name="getSourceIndices"/>
+ <Bug pattern="EI_EXPOSE_REP"/>
+ </Match>
+
+ <Match>
+ <Class name="org.apache.tez.dag.api.EdgeManagerPluginOnDemand$EventRouteMetadata"/>
+ <Method name="getTargetIndices"/>
+ <Bug pattern="EI_EXPOSE_REP"/>
+ </Match>
+
</FindBugsFilter>
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPlugin.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPlugin.java b/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPlugin.java
index 8768e7d..4e22f63 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPlugin.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPlugin.java
@@ -101,7 +101,7 @@ public abstract class EdgeManagerPlugin {
public abstract void routeDataMovementEventToDestination(DataMovementEvent event,
int sourceTaskIndex, int sourceOutputIndex,
Map<Integer, List<Integer>> destinationTaskAndInputIndices) throws Exception;
-
+
/**
* Return the routing information to inform consumers about the failure of a
* source task whose outputs have been potentially lost. The return map has
@@ -144,7 +144,7 @@ public abstract class EdgeManagerPlugin {
int destinationTaskIndex, int destinationFailedInputIndex) throws Exception;
/**
- * Return ahe {@link org.apache.tez.dag.api.EdgeManagerPluginContext} for this specific instance of
+ * Return the {@link org.apache.tez.dag.api.EdgeManagerPluginContext} for this specific instance of
* the vertex manager.
*
* @return the {@link org.apache.tez.dag.api.EdgeManagerPluginContext} for the input
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPluginOnDemand.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPluginOnDemand.java b/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPluginOnDemand.java
new file mode 100644
index 0000000..05c0c62
--- /dev/null
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPluginOnDemand.java
@@ -0,0 +1,332 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.tez.dag.api;
+
+import java.util.List;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.tez.runtime.api.Event;
+import org.apache.tez.runtime.api.events.CompositeDataMovementEvent;
+import org.apache.tez.runtime.api.events.DataMovementEvent;
+import org.apache.tez.runtime.api.events.InputFailedEvent;
+import org.apache.tez.runtime.api.events.InputReadErrorEvent;
+
+/**
+ * This interface defines the routing of the event between tasks of producer and
+ * consumer vertices. The routing is bi-directional. Users can customize the
+ * routing by providing an implementation of this interface.
+ */
+@Public
+@Unstable
+public abstract class EdgeManagerPluginOnDemand extends EdgeManagerPlugin {
+
+ /**
+ * Class to provide routing metadata for {@link Event}s to be routed between
+ * producer and consumer tasks. The routing data enabled the system to send
+ * the event from the producer task output to the consumer task input
+ */
+ public static class EventRouteMetadata {
+ private final int numEvents;
+ private final int[] targetIndices;
+ private final int[] sourceIndices;
+
+ /**
+ * Create an {@link EventRouteMetadata} that will create numEvents copies of
+ * the {@link Event} to be routed. Use this to create
+ * {@link EventRouteMetadata} for {@link DataMovementEvent}s or
+ * {@link InputFailedEvent}s where the target input indices must be
+ * specified to route those events. Typically numEvents would be 1 for these
+ * events.
+ *
+ * @param numEvents
+ * Number of copies of the event to be routed
+ * @param targetIndices
+ * Target input indices. The array length must match the number of
+ * events specified when creating the {@link EventRouteMetadata}
+ * object
+ * @return {@link EventRouteMetadata}
+ */
+ public static EventRouteMetadata create(int numEvents, int[] targetIndices) {
+ return new EventRouteMetadata(numEvents, targetIndices, null);
+ }
+
+ /**
+ * Create an {@link EventRouteMetadata} that will create numEvents copies of
+ * the {@link Event} to be routed. Use this to create
+ * {@link EventRouteMetadata} for {@link CompositeDataMovementEvent} where
+ * the target input indices and source output indices must be specified to
+ * route those events. Typically numEvents would be 1 for these events.
+ *
+ * @param numEvents
+ * Number of copies of the event to be routed
+ * @param targetIndices
+ * Target input indices. The array length must match the number of
+ * events specified when creating the {@link EventRouteMetadata}
+ * object
+ * @param sourceIndices
+ * Source output indices. The array length must match the number of
+ * events specified when creating the {@link EventRouteMetadata}
+ * object
+ * @return {@link EventRouteMetadata}
+ */
+ public static EventRouteMetadata create(int numEvents, int[] targetIndices, int[] sourceIndices) {
+ return new EventRouteMetadata(numEvents, targetIndices, sourceIndices);
+ }
+
+ private EventRouteMetadata(int numEvents, int[] targetIndices, int[] sourceIndices) {
+ this.numEvents = numEvents;
+ this.targetIndices = targetIndices;
+ this.sourceIndices = sourceIndices;
+ }
+
+ /**
+ * Get the number of copies of the event to be routed
+ * @return Number of copies
+ */
+ public int getNumEvents() {
+ return numEvents;
+ }
+
+ /**
+ * Get the target input indices
+ * @return Target input indices
+ */
+ public @Nullable int[] getTargetIndices() {
+ return targetIndices;
+ }
+
+ /**
+ * Get the source output indices
+ * @return Source output indices
+ */
+ public @Nullable int[] getSourceIndices() {
+ return sourceIndices;
+ }
+ }
+
+ /**
+ * Create an instance of the {@link EdgeManagerPluginOnDemand}. Classes
+ * extending this to create a {@link EdgeManagerPluginOnDemand}, must provide
+ * the same constructor so that Tez can create an instance of the class at
+ * runtime.
+ *
+ * @param context
+ * the context within which this {@link EdgeManagerPluginOnDemand}
+ * will run. Includes information like configuration which the user
+ * may have specified while setting up the edge.
+ */
+ public EdgeManagerPluginOnDemand(EdgeManagerPluginContext context) {
+ super(context);
+ }
+
+ /**
+ * Initializes the EdgeManagerPlugin. This method is called in the following
+ * circumstances </p> 1. when initializing an EdgeManagerPlugin for the first time.
+ * </p> 2. When an EdgeManagerPlugin is replaced at runtime. At this point, an
+ * EdgeManagerPlugin instance is created and setup by the user. The initialize
+ * method will be called with the original {@link EdgeManagerPluginContext} when the
+ * EdgeManagerPlugin is replaced.
+ * @throws Exception
+ */
+ public abstract void initialize() throws Exception;
+
+ /**
+ * This method will be invoked just before routing of events will begin. The
+ * plugin can use this opportunity to make any runtime initialization's that
+ * depend on the actual state of the DAG or vertices.
+ */
+ public abstract void prepareForRouting() throws Exception;
+
+ /**
+ * Get the number of physical inputs on the destination task
+ * @param destinationTaskIndex Index of destination task for which number of
+ * inputs is needed
+ * @return Number of physical inputs on the destination task
+ * @throws Exception
+ */
+ public abstract int getNumDestinationTaskPhysicalInputs(int destinationTaskIndex) throws Exception;
+
+ /**
+ * Get the number of physical outputs on the source task
+ * @param sourceTaskIndex Index of the source task for which number of outputs
+ * is needed
+ * @return Number of physical outputs on the source task
+ * @throws Exception
+ */
+ public abstract int getNumSourceTaskPhysicalOutputs(int sourceTaskIndex) throws Exception;
+
+ /**
+ * Get the number of destination tasks that consume data from the source task
+ * @param sourceTaskIndex Source task index
+ * @throws Exception
+ */
+ public abstract int getNumDestinationConsumerTasks(int sourceTaskIndex) throws Exception;
+
+ /**
+ * Return the source task index to which to send the input error event
+ *
+ * @param destinationTaskIndex
+ * Destination task that reported the error
+ * @param destinationFailedInputIndex
+ * Index of the physical input on the destination task that reported
+ * the error
+ * @return Index of the source task that created the unavailable input
+ * @throws Exception
+ */
+ public abstract int routeInputErrorEventToSource(int destinationTaskIndex,
+ int destinationFailedInputIndex) throws Exception;
+
+ /**
+ * The method provides the {@link EventRouteMetadata} to route a
+ * {@link DataMovementEvent} produced by the given source task to the given
+ * destination task. The returned {@link EventRouteMetadata} should have the
+ * target input indices set to enable the routing. If the routing metadata is
+ * common across different events then the plugin can cache and reuse the same
+ * object.
+ *
+ * @param sourceTaskIndex
+ * The index of the task in the source vertex of this edge that
+ * produced a {@link DataMovementEvent}
+ * @param sourceOutputIndex
+ * Index of the physical output on the source task that produced the
+ * event
+ * @param destinationTaskIndex
+ * @return {@link EventRouteMetadata} with target indices set. Maybe null if
+ * the given destination task does not read input from the given
+ * source task.
+ * @throws Exception
+ */
+ public abstract @Nullable EventRouteMetadata routeDataMovementEventToDestination(int sourceTaskIndex,
+ int sourceOutputIndex, int destinationTaskIndex) throws Exception;
+
+ /**
+ * The method provides the {@link EventRouteMetadata} to route a
+ * {@link CompositeDataMovementEvent} produced by the given source task to the
+ * given destination task. The returned {@link EventRouteMetadata} should have
+ * the target input indices and source output indices set to enable the
+ * routing. If the routing metadata is common across different events then the
+ * plugin can cache and reuse the same object.
+ *
+ * @param sourceTaskIndex
+ * The index of the task in the source vertex of this edge that
+ * produced a {@link CompositeDataMovementEvent}
+ * @param destinationTaskIndex
+ * The index of the task in the destination vertex of this edge
+ * @return {@link EventRouteMetadata} with source and target indices set. This
+ * may be null if the destination task does not read data from the
+ * source task.
+ * @throws Exception
+ */
+ public abstract @Nullable EventRouteMetadata routeCompositeDataMovementEventToDestination(
+ int sourceTaskIndex, int destinationTaskIndex) throws Exception;
+
+ /**
+ * The method provides the {@link EventRouteMetadata} to route an
+ * {@link InputFailedEvent} produced by the given source task to the given
+ * destination task. The returned {@link EventRouteMetadata} should have the
+ * target input indices set to enable the routing. If the routing metadata is
+ * common across different events then the plugin can cache and reuse the same
+ * object.
+ *
+ * @param sourceTaskIndex
+ * The index of the failed task in the source vertex of this edge.
+ * @param destinationTaskIndex
+ * The index of a task in the destination vertex of this edge.
+ * @return {@link EventRouteMetadata} with target indices set. Maybe null if
+ * the given destination task does not read input from the given
+ * source task.
+ * @throws Exception
+ */
+ public abstract @Nullable EventRouteMetadata routeInputSourceTaskFailedEventToDestination(
+ int sourceTaskIndex, int destinationTaskIndex) throws Exception;
+
+ /**
+ * Return the {@link org.apache.tez.dag.api.EdgeManagerPluginContext} for this specific instance of
+ * the vertex manager.
+ *
+ * @return the {@link org.apache.tez.dag.api.EdgeManagerPluginContext} for the input
+ */
+ public EdgeManagerPluginContext getContext() {
+ return super.getContext();
+ }
+
+ // Empty implementations of EdgeManagerPlugin interfaces that are not needed
+ /**
+ * Return the routing information to inform consumers about the source task
+ * output that is now available. The return map has the routing information.
+ * The event will be routed to every destination task index in the key of the
+ * map. Every physical input in the value for that task key will receive the
+ * input.
+ *
+ * @param event
+ * Data movement event that contains the output information
+ * @param sourceTaskIndex
+ * Source task that produced the event
+ * @param sourceOutputIndex
+ * Index of the physical output on the source task that produced the
+ * event
+ * @param destinationTaskAndInputIndices
+ * Map via which the routing information is returned
+ * @throws Exception
+ */
+ public void routeDataMovementEventToDestination(DataMovementEvent event,
+ int sourceTaskIndex, int sourceOutputIndex,
+ Map<Integer, List<Integer>> destinationTaskAndInputIndices) throws Exception {}
+
+ /**
+ * Return the routing information to inform consumers about the failure of a
+ * source task whose outputs have been potentially lost. The return map has
+ * the routing information. The failure notification event will be sent to
+ * every task index in the key of the map. Every physical input in the value
+ * for that task key will receive the failure notification. This method will
+ * be called once for every source task failure and information for all
+ * affected destinations must be provided in that invocation.
+ *
+ * @param sourceTaskIndex
+ * Source task
+ * @param destinationTaskAndInputIndices
+ * Map via which the routing information is returned
+ * @throws Exception
+ */
+ public void routeInputSourceTaskFailedEventToDestination(int sourceTaskIndex,
+ Map<Integer, List<Integer>> destinationTaskAndInputIndices) throws Exception {}
+
+ /**
+ * Return the source task index to which to send the input error event
+ *
+ * @param event
+ * Input read error event. Has more information about the error
+ * @param destinationTaskIndex
+ * Destination task that reported the error
+ * @param destinationFailedInputIndex
+ * Index of the physical input on the destination task that reported
+ * the error
+ * @return Index of the source task that created the unavailable input
+ * @throws Exception
+ */
+ public int routeInputErrorEventToSource(InputReadErrorEvent event,
+ int destinationTaskIndex, int destinationFailedInputIndex) throws Exception {
+ return routeInputErrorEventToSource(destinationTaskIndex, destinationFailedInputIndex);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-api/src/main/java/org/apache/tez/runtime/api/events/CompositeDataMovementEvent.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/events/CompositeDataMovementEvent.java b/tez-api/src/main/java/org/apache/tez/runtime/api/events/CompositeDataMovementEvent.java
index b38fda3..c45d272 100644
--- a/tez-api/src/main/java/org/apache/tez/runtime/api/events/CompositeDataMovementEvent.java
+++ b/tez-api/src/main/java/org/apache/tez/runtime/api/events/CompositeDataMovementEvent.java
@@ -21,6 +21,7 @@ package org.apache.tez.runtime.api.events;
import java.nio.ByteBuffer;
import java.util.Iterator;
+import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.tez.runtime.api.Event;
@@ -65,6 +66,26 @@ public class CompositeDataMovementEvent extends Event {
ByteBuffer userPayload) {
return new CompositeDataMovementEvent(srcIndexStart, count, userPayload);
}
+
+ /**
+ * Expand the {@link CompositeDataMovementEvent} into a routable
+ * {@link DataMovementEvent} by providing the source output index and the
+ * target input index.
+ *
+ * @param sourceIndex
+ * The index of the physical output represented by the
+ * {@link DataMovementEvent}
+ * @param targetIndex
+ * The index of the physical input to which the given
+ * {@link DataMovementEvent} should be routed.
+ * @return {@link DataMovementEvent} created from the
+ * {@link CompositeDataMovementEvent} with indices specified by the
+ * method parameters
+ */
+ @Private
+ public DataMovementEvent expand(int sourceIndex, int targetIndex) {
+ return new DataMovementEvent(sourceIndex, targetIndex, version, userPayload);
+ }
public int getSourceIndexStart() {
return sourceIndexStart;
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-api/src/main/java/org/apache/tez/runtime/api/events/DataMovementEvent.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/events/DataMovementEvent.java b/tez-api/src/main/java/org/apache/tez/runtime/api/events/DataMovementEvent.java
index b9c1cc4..05c3d3f 100644
--- a/tez-api/src/main/java/org/apache/tez/runtime/api/events/DataMovementEvent.java
+++ b/tez-api/src/main/java/org/apache/tez/runtime/api/events/DataMovementEvent.java
@@ -23,6 +23,7 @@ import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.tez.runtime.api.Event;
+import org.apache.tez.runtime.api.Output;
/**
* Event used by user code to send information between tasks. An output can
@@ -56,14 +57,8 @@ public final class DataMovementEvent extends Event {
private int version;
- private DataMovementEvent(int sourceIndex,
- ByteBuffer userPayload) {
- this.userPayload = userPayload;
- this.sourceIndex = sourceIndex;
- }
-
@Private
- private DataMovementEvent(int sourceIndex,
+ DataMovementEvent(int sourceIndex,
int targetIndex,
int version,
ByteBuffer userPayload) {
@@ -74,20 +69,21 @@ public final class DataMovementEvent extends Event {
}
private DataMovementEvent(ByteBuffer userPayload) {
- this(-1, userPayload);
+ this(-1, -1, -1, userPayload);
}
/**
- * User Event constructor
+ * User Event constructor for {@link Output}s
* @param sourceIndex Index to identify the physical edge of the input/output
* that generated the event
* @param userPayload User Payload of the User Event
*/
public static DataMovementEvent create(int sourceIndex,
ByteBuffer userPayload) {
- return new DataMovementEvent(sourceIndex, userPayload);
+ return new DataMovementEvent(sourceIndex, -1, -1, userPayload);
}
-
+
+ @Private
/**
* Constructor for Processor-generated User Events
* @param userPayload
@@ -103,6 +99,21 @@ public final class DataMovementEvent extends Event {
ByteBuffer userPayload) {
return new DataMovementEvent(sourceIndex, targetIndex, version, userPayload);
}
+
+ /**
+ * Make a routable copy of the {@link DataMovementEvent} by adding a target
+ * input index
+ *
+ * @param targetIndex
+ * The index of the physical input to which this
+ * {@link DataMovementEvent} should be routed
+ * @return Copy of this {@link DataMovementEvent} with the target input index
+ * added to it
+ */
+ @Private
+ public DataMovementEvent makeCopy(int targetIndex) {
+ return new DataMovementEvent(sourceIndex, targetIndex, version, userPayload);
+ }
public ByteBuffer getUserPayload() {
return userPayload == null ? null : userPayload.asReadOnlyBuffer();
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputFailedEvent.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputFailedEvent.java b/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputFailedEvent.java
index 639d0b9..9d8363a 100644
--- a/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputFailedEvent.java
+++ b/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputFailedEvent.java
@@ -55,6 +55,22 @@ public class InputFailedEvent extends Event{
public static InputFailedEvent create(int targetIndex, int version) {
return new InputFailedEvent(targetIndex, version);
}
+
+ /**
+ * Create a copy of the {@link InputFailedEvent} by adding a target input
+ * index The index of the physical input to which this event should be routed
+ *
+ * @param targetIndex
+ * The index of the physical input to which this
+ * {@link InputFailedEvent} should be routed
+ *
+ * @return copy of the {@link InputFailedEvent} with the target input index
+ * added
+ */
+ @Private
+ public InputFailedEvent makeCopy(int targetIndex) {
+ return create(targetIndex, version);
+ }
public int getTargetIndex() {
return targetIndex;
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/findbugs-exclude.xml
----------------------------------------------------------------------
diff --git a/tez-dag/findbugs-exclude.xml b/tez-dag/findbugs-exclude.xml
index 233f76c..57c0aca 100644
--- a/tez-dag/findbugs-exclude.xml
+++ b/tez-dag/findbugs-exclude.xml
@@ -118,7 +118,10 @@
<!-- TEZ-1952 -->
<Match>
<Class name="org.apache.tez.dag.app.dag.impl.Edge"/>
- <Field name="edgeProperty"/>
+ <Or>
+ <Field name="edgeProperty"/>
+ <Field name="onDemandRouting"/>
+ </Or>
<Bug pattern="IS2_INCONSISTENT_SYNC"/>
</Match>
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptEventInfo.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptEventInfo.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptEventInfo.java
new file mode 100644
index 0000000..49ff044
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptEventInfo.java
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.tez.dag.app;
+
+import java.util.List;
+
+import org.apache.tez.runtime.api.impl.TezEvent;
+
+public class TaskAttemptEventInfo {
+ private final int nextFromEventId;
+ private final List<TezEvent> events;
+
+ public TaskAttemptEventInfo(int nextFromEventId, List<TezEvent> events) {
+ this.nextFromEventId = nextFromEventId;
+ this.events = events;
+ }
+
+ public int getNextFromEventId() {
+ return nextFromEventId;
+ }
+
+ public List<TezEvent> getEvents() {
+ return events;
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index b38081b..970489d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -444,13 +444,13 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
new VertexEventRouteEvent(vertexId, Collections.unmodifiableList(otherEvents)));
}
taskHeartbeatHandler.pinged(taskAttemptID);
- List<TezEvent> outEvents = context
+ TaskAttemptEventInfo eventInfo = context
.getCurrentDAG()
.getVertex(taskAttemptID.getTaskID().getVertexID())
- .getTask(taskAttemptID.getTaskID())
.getTaskAttemptTezEvents(taskAttemptID, request.getStartIndex(),
request.getMaxEvents());
- response.setEvents(outEvents);
+ response.setEvents(eventInfo.getEvents());
+ response.setNextFromEventId(eventInfo.getNextFromEventId());
}
containerInfo.lastRequestId = requestId;
containerInfo.lastReponse = response;
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
index 77ef6e0..bb42392 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
@@ -45,10 +45,12 @@ import org.apache.tez.dag.api.records.DAGProtos.VertexPlan;
import org.apache.tez.dag.api.client.ProgressBuilder;
import org.apache.tez.dag.api.client.VertexStatusBuilder;
import org.apache.tez.dag.app.AppContext;
+import org.apache.tez.dag.app.TaskAttemptEventInfo;
import org.apache.tez.dag.app.dag.event.SpeculatorEvent;
import org.apache.tez.dag.app.dag.impl.AMUserCodeException;
import org.apache.tez.dag.app.dag.impl.Edge;
import org.apache.tez.dag.history.HistoryEvent;
+import org.apache.tez.dag.records.TezTaskAttemptID;
import org.apache.tez.dag.records.TezTaskID;
import org.apache.tez.dag.records.TezVertexID;
import org.apache.tez.runtime.api.OutputCommitter;
@@ -144,6 +146,9 @@ public interface Vertex extends Comparable<Vertex> {
void scheduleSpeculativeTask(TezTaskID taskId);
Resource getTaskResource();
+ public TaskAttemptEventInfo getTaskAttemptTezEvents(TezTaskAttemptID attemptID,
+ int fromEventId, int maxEvents);
+
void handleSpeculatorEvent(SpeculatorEvent event);
ProcessorDescriptor getProcessorDescriptor();
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/BroadcastEdgeManager.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/BroadcastEdgeManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/BroadcastEdgeManager.java
index db57227..d14527d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/BroadcastEdgeManager.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/BroadcastEdgeManager.java
@@ -22,12 +22,14 @@ import java.util.Collections;
import java.util.List;
import java.util.Map;
-import org.apache.tez.dag.api.EdgeManagerPlugin;
import org.apache.tez.dag.api.EdgeManagerPluginContext;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand;
import org.apache.tez.runtime.api.events.DataMovementEvent;
import org.apache.tez.runtime.api.events.InputReadErrorEvent;
-public class BroadcastEdgeManager extends EdgeManagerPlugin {
+public class BroadcastEdgeManager extends EdgeManagerPluginOnDemand {
+
+ EventRouteMetadata[] commonRouteMeta;
public BroadcastEdgeManager(EdgeManagerPluginContext context) {
super(context);
@@ -60,6 +62,35 @@ public class BroadcastEdgeManager extends EdgeManagerPlugin {
}
@Override
+ public void prepareForRouting() throws Exception {
+ int numSourceTasks = getContext().getSourceVertexNumTasks();
+ commonRouteMeta = new EventRouteMetadata[numSourceTasks];
+ for (int i=0; i<numSourceTasks; ++i) {
+ commonRouteMeta[i] = EventRouteMetadata.create(1, new int[]{i}, new int[]{0});
+ }
+ }
+
+ @Override
+ public EventRouteMetadata routeDataMovementEventToDestination(
+ int sourceTaskIndex, int sourceOutputIndex, int destinationTaskIndex)
+ throws Exception {
+ return commonRouteMeta[sourceTaskIndex];
+ }
+
+ @Override
+ public EventRouteMetadata routeCompositeDataMovementEventToDestination(
+ int sourceTaskIndex, int destinationTaskIndex)
+ throws Exception {
+ return commonRouteMeta[sourceTaskIndex];
+ }
+
+ @Override
+ public EventRouteMetadata routeInputSourceTaskFailedEventToDestination(
+ int sourceTaskIndex, int destinationTaskIndex) throws Exception {
+ return commonRouteMeta[sourceTaskIndex];
+ }
+
+ @Override
public void routeInputSourceTaskFailedEventToDestination(int sourceTaskIndex,
Map<Integer, List<Integer>> destinationTaskAndInputIndices) {
List<Integer> inputIndices =
@@ -71,6 +102,12 @@ public class BroadcastEdgeManager extends EdgeManagerPlugin {
}
@Override
+ public int routeInputErrorEventToSource(int destinationTaskIndex, int destinationFailedInputIndex)
+ throws Exception {
+ return destinationFailedInputIndex;
+ }
+
+ @Override
public int routeInputErrorEventToSource(InputReadErrorEvent event,
int destinationTaskIndex, int destinationFailedInputIndex) {
return destinationFailedInputIndex;
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java
index f5fef67..78bab05 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java
@@ -21,6 +21,7 @@ package org.apache.tez.dag.app.dag.impl;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
+import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicBoolean;
import org.slf4j.Logger;
@@ -30,9 +31,11 @@ import org.apache.tez.common.ReflectionUtils;
import org.apache.tez.dag.api.EdgeManagerPlugin;
import org.apache.tez.dag.api.EdgeManagerPluginContext;
import org.apache.tez.dag.api.EdgeManagerPluginDescriptor;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand;
import org.apache.tez.dag.api.EdgeProperty;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.api.UserPayload;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand.EventRouteMetadata;
import org.apache.tez.dag.app.dag.Task;
import org.apache.tez.dag.app.dag.Vertex;
import org.apache.tez.dag.app.dag.event.TaskAttemptEventOutputFailed;
@@ -97,7 +100,9 @@ public class Edge {
private EdgeProperty edgeProperty;
private EdgeManagerPluginContext edgeManagerContext;
- private EdgeManagerPlugin edgeManager;
+ @VisibleForTesting
+ EdgeManagerPlugin edgeManager;
+ private boolean onDemandRouting = false;
@SuppressWarnings("rawtypes")
private EventHandler eventHandler;
private AtomicBoolean bufferEvents = new AtomicBoolean(false);
@@ -106,6 +111,9 @@ public class Edge {
private Vertex sourceVertex;
private Vertex destinationVertex; // this may end up being a list for shared edge
private EventMetaData destinationMetaInfo;
+ private boolean routingNeeded = true;
+ private final ConcurrentMap<TezTaskAttemptID, PendingEventRouteMetadata> pendingEvents = Maps
+ .newConcurrentMap();
@SuppressWarnings("rawtypes")
public Edge(EdgeProperty edgeProperty, EventHandler eventHandler) {
@@ -188,6 +196,29 @@ public class Edge {
edgeProperty.getEdgeDestination());
setEdgeProperty(modifiedEdgeProperty);
}
+
+ public synchronized boolean routingToBegin() throws AMUserCodeException {
+ if (edgeManagerContext.getDestinationVertexNumTasks() == 0) {
+ routingNeeded = false;
+ } else if (edgeManagerContext.getDestinationVertexNumTasks() < 0) {
+ throw new TezUncheckedException(
+ "Internal error. Not expected to route events to a destination until parallelism is determined" +
+ " sourceVertex=" + sourceVertex.getLogIdentifier() +
+ " edgeManager=" + edgeManager.getClass().getName());
+ }
+ if (edgeManager instanceof EdgeManagerPluginOnDemand) {
+ onDemandRouting = true;
+ try {
+ ((EdgeManagerPluginOnDemand)edgeManager).prepareForRouting();
+ } catch (Exception e) {
+ throw new AMUserCodeException(Source.EdgeManager,
+ "Fail to prepareForRouting " + getEdgeInfo(), e);
+ }
+ }
+
+ LOG.info("Routing to begin for edge: " + getEdgeInfo() + ". EdgeProperty: " + edgeProperty);
+ return onDemandRouting;
+ }
public synchronized EdgeProperty getEdgeProperty() {
return this.edgeProperty;
@@ -280,8 +311,13 @@ public class Edge {
int srcTaskIndex;
int numConsumers;
try {
- srcTaskIndex = edgeManager.routeInputErrorEventToSource(event,
- destTaskIndex, event.getIndex());
+ if (onDemandRouting) {
+ srcTaskIndex = ((EdgeManagerPluginOnDemand) edgeManager).routeInputErrorEventToSource(
+ destTaskIndex, event.getIndex());
+ } else {
+ srcTaskIndex = edgeManager.routeInputErrorEventToSource(event,
+ destTaskIndex, event.getIndex());
+ }
Preconditions.checkArgument(srcTaskIndex >= 0,
"SourceTaskIndex should not be negative,"
+ "srcTaskIndex=" + srcTaskIndex);
@@ -340,7 +376,6 @@ public class Edge {
Preconditions.checkState(edgeManager != null,
"Edge Manager must be initialized by this time");
Event event = tezEvent.getEvent();
- boolean isFirstEvent = true;
// cache of event object per input index
Map<Integer, TezEvent> inputIndicesWithEvents = Maps.newHashMap();
for (Map.Entry<Integer, List<Integer>> entry : taskAndInputIndices.entrySet()) {
@@ -350,28 +385,16 @@ public class Edge {
Integer inputIndex = inputIndices.get(i);
TezEvent tezEventToSend = inputIndicesWithEvents.get(inputIndex);
if (tezEventToSend == null) {
- if (isFirstEvent) {
- isFirstEvent = false;
- // this is the first item - reuse the event object
- if (isDataMovementEvent) {
- ((DataMovementEvent) event).setTargetIndex(inputIndex);
- } else {
- ((InputFailedEvent) event).setTargetIndex(inputIndex);
- }
- tezEventToSend = tezEvent;
+ Event e;
+ if (isDataMovementEvent) {
+ DataMovementEvent dmEvent = (DataMovementEvent) event;
+ e = DataMovementEvent.create(dmEvent.getSourceIndex(),
+ inputIndex, dmEvent.getVersion(), dmEvent.getUserPayload());
} else {
- // create new event object for this input index
- Event e;
- if (isDataMovementEvent) {
- DataMovementEvent dmEvent = (DataMovementEvent) event;
- e = DataMovementEvent.create(dmEvent.getSourceIndex(),
- inputIndex, dmEvent.getVersion(), dmEvent.getUserPayload());
- } else {
- InputFailedEvent ifEvent = ((InputFailedEvent) event);
- e = InputFailedEvent.create(inputIndex, ifEvent.getVersion());
- }
- tezEventToSend = new TezEvent(e, tezEvent.getSourceInfo());
+ InputFailedEvent ifEvent = ((InputFailedEvent) event);
+ e = InputFailedEvent.create(inputIndex, ifEvent.getVersion());
}
+ tezEventToSend = new TezEvent(e, tezEvent.getSourceInfo());
tezEventToSend.setDestinationInfo(destinationMetaInfo);
// cache the event object per input because are unique per input index
inputIndicesWithEvents.put(inputIndex, tezEventToSend);
@@ -392,8 +415,6 @@ public class Edge {
}
public void sendTezEventToDestinationTasks(TezEvent tezEvent) throws AMUserCodeException {
- Preconditions.checkState(edgeManager != null,
- "Edge Manager must be initialized by this time");
if (!bufferEvents.get()) {
boolean isDataMovementEvent = true;
switch (tezEvent.getEventType()) {
@@ -411,16 +432,7 @@ public class Edge {
.getTaskAttemptID();
int srcTaskIndex = srcAttemptId.getTaskID().getId();
- boolean routingRequired = true;
- if (edgeManagerContext.getDestinationVertexNumTasks() == 0) {
- routingRequired = false;
- LOG.info("Not routing events since destination vertex has 0 tasks" +
- generateCommonDebugString(srcTaskIndex, tezEvent));
- } else if (edgeManagerContext.getDestinationVertexNumTasks() < 0) {
- throw new TezUncheckedException(
- "Internal error. Not expected to route events to a destination until parallelism is determined" +
- generateCommonDebugString(srcTaskIndex, tezEvent));
- }
+ boolean routingRequired = routingNeeded;
if (routingRequired) {
try {
@@ -439,6 +451,9 @@ public class Edge {
+ ", sourceInfo:" + tezEvent.getSourceInfo() + ", destinationInfo:"
+ tezEvent.getDestinationInfo() + ", " + getEdgeInfo(), e);
}
+ } else {
+ LOG.info("Not routing events since destination vertex has 0 tasks" +
+ generateCommonDebugString(srcTaskIndex, tezEvent));
}
if (!destTaskAndInputIndices.isEmpty()) {
@@ -459,6 +474,163 @@ public class Edge {
}
}
+ static class PendingEventRouteMetadata {
+ private final EventRouteMetadata routeMeta;
+ private final TezEvent event;
+ private int numEventsRouted;
+
+ public PendingEventRouteMetadata(EventRouteMetadata routeMeta, TezEvent event,
+ int numEventsRouted) {
+ this.routeMeta = routeMeta;
+ this.event = event;
+ this.numEventsRouted = numEventsRouted;
+ }
+
+ public EventRouteMetadata getRouteMeta() {
+ return routeMeta;
+ }
+
+ public TezEvent getTezEvent() {
+ return event;
+ }
+
+ public int getNumEventsRouted() {
+ return numEventsRouted;
+ }
+ }
+
+ public PendingEventRouteMetadata removePendingEvents(TezTaskAttemptID attemptID) {
+ return pendingEvents.remove(attemptID);
+ }
+
+ // return false is event could be routed but ran out of space in the list
+ public boolean maybeAddTezEventForDestinationTask(TezEvent tezEvent, TezTaskAttemptID attemptID,
+ int srcTaskIndex, List<TezEvent> listToAdd, int listMaxSize,
+ PendingEventRouteMetadata pendingRoutes)
+ throws AMUserCodeException {
+ if (!routingNeeded) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Not routing events since destination vertex has 0 tasks" +
+ generateCommonDebugString(srcTaskIndex, tezEvent));
+ }
+ return true;
+ } else {
+ try {
+ EdgeManagerPluginOnDemand edgeManagerOnDemand = (EdgeManagerPluginOnDemand) edgeManager;
+ int taskIndex = attemptID.getTaskID().getId();
+ switch (tezEvent.getEventType()) {
+ case COMPOSITE_DATA_MOVEMENT_EVENT:
+ {
+ CompositeDataMovementEvent compEvent = (CompositeDataMovementEvent) tezEvent.getEvent();
+ EventRouteMetadata routeMeta;
+ int numEventsDone;
+ if (pendingRoutes != null) {
+ routeMeta = pendingRoutes.getRouteMeta();
+ numEventsDone = pendingRoutes.getNumEventsRouted();
+ } else {
+ routeMeta = edgeManagerOnDemand
+ .routeCompositeDataMovementEventToDestination(srcTaskIndex, taskIndex);
+ numEventsDone = 0;
+ }
+ if (routeMeta != null) {
+ int listSize = listToAdd.size();
+ int numEvents = routeMeta.getNumEvents();
+ int[] sourceIndices = routeMeta.getSourceIndices();
+ int[] targetIndices = routeMeta.getTargetIndices();
+ while (numEventsDone < numEvents && listSize++ < listMaxSize) {
+ DataMovementEvent e = compEvent.expand(sourceIndices[numEventsDone],
+ targetIndices[numEventsDone]);
+ numEventsDone++;
+ TezEvent tezEventToSend = new TezEvent(e, tezEvent.getSourceInfo());
+ tezEventToSend.setDestinationInfo(destinationMetaInfo);
+ listToAdd.add(tezEventToSend);
+ }
+ if (numEventsDone < numEvents) {
+ pendingEvents.put(attemptID, new PendingEventRouteMetadata(routeMeta, tezEvent,
+ numEventsDone));
+ return false;
+ }
+ }
+ }
+ break;
+ case INPUT_FAILED_EVENT:
+ {
+ InputFailedEvent ifEvent = (InputFailedEvent) tezEvent.getEvent();
+ EventRouteMetadata routeMeta;
+ int numEventsDone;
+ if (pendingRoutes != null) {
+ routeMeta = pendingRoutes.getRouteMeta();
+ numEventsDone = pendingRoutes.getNumEventsRouted();
+ } else {
+ routeMeta = edgeManagerOnDemand.routeInputSourceTaskFailedEventToDestination(
+ srcTaskIndex, taskIndex);
+ numEventsDone = 0;
+ }
+ if (routeMeta != null) {
+ int listSize = listToAdd.size();
+ int numEvents = routeMeta.getNumEvents();
+ int[] targetIndices = routeMeta.getTargetIndices();
+ while (numEventsDone < numEvents && listSize++ < listMaxSize) {
+ InputFailedEvent e = ifEvent.makeCopy(targetIndices[numEventsDone]);
+ numEventsDone++;
+ TezEvent tezEventToSend = new TezEvent(e, tezEvent.getSourceInfo());
+ tezEventToSend.setDestinationInfo(destinationMetaInfo);
+ listToAdd.add(tezEventToSend);
+ }
+ if (numEventsDone < numEvents) {
+ pendingEvents.put(attemptID, new PendingEventRouteMetadata(routeMeta, tezEvent,
+ numEventsDone));
+ return false;
+ }
+ }
+ }
+ break;
+ case DATA_MOVEMENT_EVENT:
+ {
+ DataMovementEvent dmEvent = (DataMovementEvent) tezEvent.getEvent();
+ EventRouteMetadata routeMeta;
+ int numEventsDone;
+ if (pendingRoutes != null) {
+ routeMeta = pendingRoutes.getRouteMeta();
+ numEventsDone = pendingRoutes.getNumEventsRouted();
+ } else {
+ routeMeta = edgeManagerOnDemand.routeDataMovementEventToDestination(srcTaskIndex,
+ dmEvent.getSourceIndex(), taskIndex);
+ numEventsDone = 0;
+ }
+ if (routeMeta != null) {
+ int listSize = listToAdd.size();
+ int numEvents = routeMeta.getNumEvents();
+ int[] targetIndices = routeMeta.getTargetIndices();
+ while (numEventsDone < numEvents && listSize++ < listMaxSize) {
+ DataMovementEvent e = dmEvent.makeCopy(targetIndices[numEventsDone]);
+ numEventsDone++;
+ TezEvent tezEventToSend = new TezEvent(e, tezEvent.getSourceInfo());
+ tezEventToSend.setDestinationInfo(destinationMetaInfo);
+ listToAdd.add(tezEventToSend);
+ }
+ if (numEventsDone < numEvents) {
+ pendingEvents.put(attemptID, new PendingEventRouteMetadata(routeMeta, tezEvent,
+ numEventsDone));
+ return false;
+ }
+ }
+ }
+ break;
+ default:
+ throw new TezUncheckedException("Unhandled tez event type: "
+ + tezEvent.getEventType());
+ }
+ } catch (Exception e){
+ throw new AMUserCodeException(Source.EdgeManager,
+ "Fail to maybeAddTezEventForDestinationTask, event:" + tezEvent.getEvent()
+ + ", sourceInfo:" + tezEvent.getSourceInfo() + ", destinationInfo:"
+ + tezEvent.getDestinationInfo() + ", " + getEdgeInfo(), e);
+ }
+ }
+ return true;
+ }
+
private void sendEventToTask(Task task, TezEvent tezEvent) {
task.registerTezEvent(tezEvent);
}
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManager.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManager.java
index 11a6483..6053806 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManager.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManager.java
@@ -21,18 +21,25 @@ package org.apache.tez.dag.app.dag.impl;
import java.util.Collections;
import java.util.List;
import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import javax.annotation.Nullable;
-import org.apache.tez.dag.api.EdgeManagerPlugin;
import org.apache.tez.dag.api.EdgeManagerPluginContext;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand;
import org.apache.tez.runtime.api.events.DataMovementEvent;
import org.apache.tez.runtime.api.events.InputReadErrorEvent;
import com.google.common.base.Preconditions;
-public class OneToOneEdgeManager extends EdgeManagerPlugin {
+public class OneToOneEdgeManager extends EdgeManagerPluginOnDemand {
List<Integer> destinationInputIndices =
Collections.unmodifiableList(Collections.singletonList(0));
+ AtomicBoolean stateChecked = new AtomicBoolean(false);
+
+ final EventRouteMetadata commonRouteMeta =
+ EventRouteMetadata.create(1, new int[]{0}, new int[]{0});
public OneToOneEdgeManager(EdgeManagerPluginContext context) {
super(context);
@@ -57,16 +64,42 @@ public class OneToOneEdgeManager extends EdgeManagerPlugin {
public void routeDataMovementEventToDestination(DataMovementEvent event,
int sourceTaskIndex, int sourceOutputIndex,
Map<Integer, List<Integer>> destinationTaskAndInputIndices) {
- // by the time routing is initiated all task counts must be determined and stable
- Preconditions.checkState(getContext().getSourceVertexNumTasks() == getContext()
- .getDestinationVertexNumTasks(), "1-1 source and destination task counts must match."
- + " Destination: " + getContext().getDestinationVertexName() + " tasks: "
- + getContext().getDestinationVertexNumTasks() + " Source: "
- + getContext().getSourceVertexName() + " tasks: " + getContext().getSourceVertexNumTasks());
+ checkState();
destinationTaskAndInputIndices.put(sourceTaskIndex, destinationInputIndices);
}
@Override
+ public void prepareForRouting() throws Exception {
+ checkState();
+ }
+
+ @Override
+ public EventRouteMetadata routeDataMovementEventToDestination(
+ int sourceTaskIndex, int sourceOutputIndex, int destinationTaskIndex)
+ throws Exception {
+ if (sourceTaskIndex == destinationTaskIndex) {
+ return commonRouteMeta;
+ }
+ return null;
+ }
+
+ @Override
+ public @Nullable EventRouteMetadata routeCompositeDataMovementEventToDestination(
+ int sourceTaskIndex, int destinationTaskIndex)
+ throws Exception {
+ if (sourceTaskIndex == destinationTaskIndex) {
+ return commonRouteMeta;
+ }
+ return null;
+ }
+
+ @Override
+ public EventRouteMetadata routeInputSourceTaskFailedEventToDestination(
+ int sourceTaskIndex, int destinationTaskIndex) throws Exception {
+ return commonRouteMeta;
+ }
+
+ @Override
public void routeInputSourceTaskFailedEventToDestination(int sourceTaskIndex,
Map<Integer, List<Integer>> destinationTaskAndInputIndices) {
destinationTaskAndInputIndices.put(sourceTaskIndex, destinationInputIndices);
@@ -79,8 +112,26 @@ public class OneToOneEdgeManager extends EdgeManagerPlugin {
}
@Override
+ public int routeInputErrorEventToSource(int destinationTaskIndex, int destinationFailedInputIndex) {
+ return destinationTaskIndex;
+ }
+
+ @Override
public int getNumDestinationConsumerTasks(int sourceTaskIndex) {
return 1;
}
+
+ private void checkState() {
+ if (stateChecked.get()) {
+ return;
+ }
+ // by the time routing is initiated all task counts must be determined and stable
+ Preconditions.checkState(getContext().getSourceVertexNumTasks() == getContext()
+ .getDestinationVertexNumTasks(), "1-1 source and destination task counts must match."
+ + " Destination: " + getContext().getDestinationVertexName() + " tasks: "
+ + getContext().getDestinationVertexNumTasks() + " Source: "
+ + getContext().getSourceVertexName() + " tasks: " + getContext().getSourceVertexNumTasks());
+ stateChecked.set(true);
+ }
}
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ScatterGatherEdgeManager.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ScatterGatherEdgeManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ScatterGatherEdgeManager.java
index e2608cd..3b66b8f 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ScatterGatherEdgeManager.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ScatterGatherEdgeManager.java
@@ -18,18 +18,29 @@
package org.apache.tez.dag.app.dag.impl;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
+import java.util.concurrent.atomic.AtomicReference;
+
+import javax.annotation.Nullable;
-import org.apache.tez.dag.api.EdgeManagerPlugin;
import org.apache.tez.dag.api.EdgeManagerPluginContext;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand;
import org.apache.tez.runtime.api.events.DataMovementEvent;
import org.apache.tez.runtime.api.events.InputReadErrorEvent;
import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
-public class ScatterGatherEdgeManager extends EdgeManagerPlugin {
+public class ScatterGatherEdgeManager extends EdgeManagerPluginOnDemand {
+
+ private AtomicReference<ArrayList<EventRouteMetadata>> commonRouteMeta =
+ new AtomicReference<ArrayList<EventRouteMetadata>>();
+ private Object commonRouteMetaLock = new Object();
+ private int[][] sourceIndices;
+ private int[][] targetIndices;
public ScatterGatherEdgeManager(EdgeManagerPluginContext context) {
super(context);
@@ -53,6 +64,69 @@ public class ScatterGatherEdgeManager extends EdgeManagerPlugin {
return physicalOutputs;
}
+ private ArrayList<EventRouteMetadata> getOrCreateCommonRouteMeta() {
+ ArrayList<EventRouteMetadata> metaData = commonRouteMeta.get();
+ if (metaData == null) {
+ synchronized (commonRouteMetaLock) {
+ metaData = commonRouteMeta.get();
+ if (metaData == null) {
+ int numSourceTasks = getContext().getSourceVertexNumTasks();
+ ArrayList<EventRouteMetadata> localEventMeta = Lists
+ .newArrayListWithCapacity(numSourceTasks);
+ for (int i=0; i<numSourceTasks; ++i) {
+ localEventMeta.add(EventRouteMetadata.create(1, new int[]{i}, new int[]{0}));
+ }
+ Preconditions.checkState(commonRouteMeta.compareAndSet(null, localEventMeta));
+ metaData = commonRouteMeta.get();
+ }
+ }
+ }
+ return metaData;
+ }
+
+ private void createIndices() {
+ // source indices derive from num dest tasks (==partitions)
+ int numTargetTasks = getContext().getDestinationVertexNumTasks();
+ sourceIndices = new int[numTargetTasks][];
+ for (int i=0; i<numTargetTasks; ++i) {
+ sourceIndices[i] = new int[]{i};
+ }
+ // target indices derive from num src tasks
+ int numSourceTasks = getContext().getSourceVertexNumTasks();
+ targetIndices = new int[numSourceTasks][];
+ for (int i=0; i<numSourceTasks; ++i) {
+ targetIndices[i] = new int[]{i};
+ }
+ }
+
+ @Override
+ public void prepareForRouting() throws Exception {
+ createIndices();
+ }
+
+ @Override
+ public EventRouteMetadata routeDataMovementEventToDestination(
+ int sourceTaskIndex, int sourceOutputIndex, int destinationTaskIndex) throws Exception {
+ if (sourceOutputIndex == destinationTaskIndex) {
+ return getOrCreateCommonRouteMeta().get(sourceTaskIndex);
+ }
+ return null;
+ }
+
+ @Override
+ public @Nullable EventRouteMetadata routeCompositeDataMovementEventToDestination(
+ int sourceTaskIndex, int destinationTaskIndex)
+ throws Exception {
+ return EventRouteMetadata.create(1, targetIndices[sourceTaskIndex],
+ sourceIndices[destinationTaskIndex]);
+ }
+
+ @Override
+ public EventRouteMetadata routeInputSourceTaskFailedEventToDestination(
+ int sourceTaskIndex, int destinationTaskIndex) throws Exception {
+ return getOrCreateCommonRouteMeta().get(sourceTaskIndex);
+ }
+
@Override
public void routeDataMovementEventToDestination(DataMovementEvent event,
int sourceTaskIndex, int sourceOutputIndex, Map<Integer, List<Integer>> destinationTaskAndInputIndices) {
@@ -76,6 +150,11 @@ public class ScatterGatherEdgeManager extends EdgeManagerPlugin {
}
@Override
+ public int routeInputErrorEventToSource(int destinationTaskIndex, int destinationFailedInputIndex) {
+ return destinationFailedInputIndex;
+ }
+
+ @Override
public int getNumDestinationConsumerTasks(int sourceTaskIndex) {
return getContext().getDestinationVertexNumTasks();
}
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
index 15382a8..2e884e7 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
@@ -1493,12 +1493,6 @@ public class TaskImpl implements Task, EventHandler<TaskEvent> {
}
}
- @Private
- @VisibleForTesting
- public List<TezEvent> getTaskEvents() {
- return tezEventsForTaskAttempts;
- }
-
private static class KillTransition
implements SingleArcTransition<TaskImpl, TaskEvent> {
@Override
[31/43] tez git commit: TEZ-2125. Create a task communicator for
local mode. Allow tasks to run in the AM. (sseth)
Posted by ss...@apache.org.
TEZ-2125. Create a task communicator for local mode. Allow tasks to run
in the AM. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/a45ef858
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/a45ef858
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/a45ef858
Branch: refs/heads/TEZ-2003
Commit: a45ef85833df44750296430f9b85f42574795578
Parents: c47951a
Author: Siddharth Seth <ss...@apache.org>
Authored: Fri Feb 20 16:12:52 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:30 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../org/apache/tez/dag/app/DAGAppMaster.java | 25 +++++---
.../dag/app/TaskAttemptListenerImpTezDag.java | 18 ++++--
.../dag/app/TezLocalTaskCommunicatorImpl.java | 46 ++++++++++++++
.../tez/dag/app/TezTaskCommunicatorImpl.java | 67 ++++++++------------
.../app/launcher/ContainerLauncherRouter.java | 17 +++--
.../app/launcher/LocalContainerLauncher.java | 31 ++++++---
.../dag/app/rm/TaskSchedulerEventHandler.java | 2 +
.../apache/tez/dag/app/MockDAGAppMaster.java | 3 +-
.../app/TestTaskAttemptListenerImplTezDag.java | 2 +-
.../tez/service/impl/ContainerRunnerImpl.java | 2 +-
.../tez/tests/TestExternalTezServices.java | 57 +++++++++++++----
.../org/apache/tez/runtime/task/TezChild.java | 34 +++++-----
13 files changed, 204 insertions(+), 101 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 1a2264c..76496c9 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -5,5 +5,6 @@ ALL CHANGES:
TEZ-2117. Add a manager for ContainerLaunchers running in the AM.
TEZ-2122. Setup pluggable components at AM/Vertex level.
TEZ-2123. Fix component managers to use pluggable components. (Enable hybrid mode)
+ TEZ-2125. Create a task communicator for local mode. Allow tasks to run in the AM.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index 89b6506..701eca8 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -465,7 +465,7 @@ public class DAGAppMaster extends AbstractService {
//service to handle requests to TaskUmbilicalProtocol
taskAttemptListener = createTaskAttemptListener(context,
- taskHeartbeatHandler, containerHeartbeatHandler, taskCommunicatorClassIdentifiers);
+ taskHeartbeatHandler, containerHeartbeatHandler, taskCommunicatorClassIdentifiers, isLocal);
addIfService(taskAttemptListener, true);
containerSignatureMatcher = createContainerSignatureMatcher();
@@ -531,7 +531,7 @@ public class DAGAppMaster extends AbstractService {
taskSchedulerEventHandler);
addIfServiceDependency(taskSchedulerEventHandler, clientRpcServer);
- this.containerLauncherRouter = createContainerLauncherRouter(conf, containerLauncherClassIdentifiers);
+ this.containerLauncherRouter = createContainerLauncherRouter(conf, containerLauncherClassIdentifiers, isLocal);
addIfService(containerLauncherRouter, true);
dispatcher.register(NMCommunicatorEventType.class, containerLauncherRouter);
@@ -1038,9 +1038,13 @@ public class DAGAppMaster extends AbstractService {
}
protected TaskAttemptListener createTaskAttemptListener(AppContext context,
- TaskHeartbeatHandler thh, ContainerHeartbeatHandler chh, String[] taskCommunicatorClasses) {
+ TaskHeartbeatHandler thh,
+ ContainerHeartbeatHandler chh,
+ String[] taskCommunicatorClasses,
+ boolean isLocal) {
TaskAttemptListener lis =
- new TaskAttemptListenerImpTezDag(context, thh, chh,jobTokenSecretManager, taskCommunicatorClasses);
+ new TaskAttemptListenerImpTezDag(context, thh, chh, jobTokenSecretManager,
+ taskCommunicatorClasses, isLocal);
return lis;
}
@@ -1061,10 +1065,12 @@ public class DAGAppMaster extends AbstractService {
return chh;
}
- protected ContainerLauncherRouter createContainerLauncherRouter(Configuration conf, String []containerLauncherClasses) throws
+ protected ContainerLauncherRouter createContainerLauncherRouter(Configuration conf,
+ String[] containerLauncherClasses,
+ boolean isLocal) throws
UnknownHostException {
- return new ContainerLauncherRouter(conf, context, taskAttemptListener, workingDirectory, containerLauncherClasses);
-
+ return new ContainerLauncherRouter(conf, context, taskAttemptListener, workingDirectory,
+ containerLauncherClasses, isLocal);
}
public ApplicationId getAppID() {
@@ -2331,9 +2337,8 @@ public class DAGAppMaster extends AbstractService {
StringBuilder sb = new StringBuilder();
sb.append("AM Level configured ").append(component).append(": ");
for (int i = 0; i < classIdentifiers.length; i++) {
- sb.append("[").append(i).append(":").append(map.inverse().get(i)).append(":")
- .append(taskSchedulers.inverse().get(i)).append(
- "]");
+ sb.append("[").append(i).append(":").append(map.inverse().get(i))
+ .append(":").append(classIdentifiers[i]).append("]");
if (i != classIdentifiers.length - 1) {
sb.append(",");
}
http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index d21b7d0..8346839 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -99,13 +99,20 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
TaskHeartbeatHandler thh, ContainerHeartbeatHandler chh,
// TODO TEZ-2003 pre-merge. Remove reference to JobTokenSecretManager.
JobTokenSecretManager jobTokenSecretManager,
- String [] taskCommunicatorClassIdentifiers) {
+ String [] taskCommunicatorClassIdentifiers,
+ boolean isPureLocalMode) {
super(TaskAttemptListenerImpTezDag.class.getName());
this.context = context;
this.taskHeartbeatHandler = thh;
this.containerHeartbeatHandler = chh;
if (taskCommunicatorClassIdentifiers == null || taskCommunicatorClassIdentifiers.length == 0) {
- taskCommunicatorClassIdentifiers = new String[] {TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+ if (isPureLocalMode) {
+ taskCommunicatorClassIdentifiers =
+ new String[]{TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT};
+ } else {
+ taskCommunicatorClassIdentifiers =
+ new String[]{TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+ }
}
this.taskCommunicators = new TaskCommunicator[taskCommunicatorClassIdentifiers.length];
for (int i = 0 ; i < taskCommunicatorClassIdentifiers.length ; i++) {
@@ -131,11 +138,12 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
private TaskCommunicator createTaskCommunicator(String taskCommClassIdentifier) {
- if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT) ||
- taskCommClassIdentifier
- .equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
+ if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
LOG.info("Using Default Task Communicator");
return new TezTaskCommunicatorImpl(this);
+ } else if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
+ LOG.info("Using Default Local Task Communicator");
+ return new TezLocalTaskCommunicatorImpl(this);
} else {
LOG.info("Using TaskCommunicator: " + taskCommClassIdentifier);
Class<? extends TaskCommunicator> taskCommClazz = (Class<? extends TaskCommunicator>) ReflectionUtils
http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java
new file mode 100644
index 0000000..3704cc4
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app;
+
+import java.net.InetAddress;
+import java.net.InetSocketAddress;
+import java.net.UnknownHostException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.tez.dag.api.TaskCommunicatorContext;
+import org.apache.tez.dag.api.TezUncheckedException;
+
+public class TezLocalTaskCommunicatorImpl extends TezTaskCommunicatorImpl {
+
+ private static final Log LOG = LogFactory.getLog(TezLocalTaskCommunicatorImpl.class);
+
+ public TezLocalTaskCommunicatorImpl(
+ TaskCommunicatorContext taskCommunicatorContext) {
+ super(taskCommunicatorContext);
+ }
+
+ @Override
+ protected void startRpcServer() {
+ try {
+ this.address = new InetSocketAddress(InetAddress.getLocalHost(), 0);
+ } catch (UnknownHostException e) {
+ throw new TezUncheckedException(e);
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Not starting TaskAttemptListener RPC in LocalMode");
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
index 258c927..0bf1b5d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -15,10 +15,8 @@
package org.apache.tez.dag.app;
import java.io.IOException;
-import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.URISyntaxException;
-import java.net.UnknownHostException;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
@@ -76,7 +74,7 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
private final TezTaskUmbilicalProtocol taskUmbilical;
private final String tokenIdentifier;
private final Token<JobTokenIdentifier> sessionToken;
- private InetSocketAddress address;
+ protected InetSocketAddress address;
private Server server;
public static final class ContainerInfo {
@@ -120,10 +118,8 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
this.sessionToken = TokenCache.getSessionToken(taskCommunicatorContext.getCredentials());
}
-
@Override
public void serviceStart() {
-
startRpcServer();
}
@@ -134,43 +130,32 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
protected void startRpcServer() {
Configuration conf = getConfig();
- if (!conf.getBoolean(TezConfiguration.TEZ_LOCAL_MODE, TezConfiguration.TEZ_LOCAL_MODE_DEFAULT)) {
- try {
- JobTokenSecretManager jobTokenSecretManager =
- new JobTokenSecretManager();
- jobTokenSecretManager.addTokenForJob(tokenIdentifier, sessionToken);
-
- server = new RPC.Builder(conf)
- .setProtocol(TezTaskUmbilicalProtocol.class)
- .setBindAddress("0.0.0.0")
- .setPort(0)
- .setInstance(taskUmbilical)
- .setNumHandlers(
- conf.getInt(TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT,
- TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT_DEFAULT))
- .setSecretManager(jobTokenSecretManager).build();
-
- // Enable service authorization?
- if (conf.getBoolean(
- CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
- false)) {
- refreshServiceAcls(conf, new TezAMPolicyProvider());
- }
-
- server.start();
- this.address = NetUtils.getConnectAddress(server);
- } catch (IOException e) {
- throw new TezUncheckedException(e);
- }
- } else {
- try {
- this.address = new InetSocketAddress(InetAddress.getLocalHost(), 0);
- } catch (UnknownHostException e) {
- throw new TezUncheckedException(e);
- }
- if (LOG.isDebugEnabled()) {
- LOG.debug("Not starting TaskAttemptListener RPC in LocalMode");
+ try {
+ JobTokenSecretManager jobTokenSecretManager =
+ new JobTokenSecretManager();
+ jobTokenSecretManager.addTokenForJob(tokenIdentifier, sessionToken);
+
+ server = new RPC.Builder(conf)
+ .setProtocol(TezTaskUmbilicalProtocol.class)
+ .setBindAddress("0.0.0.0")
+ .setPort(0)
+ .setInstance(taskUmbilical)
+ .setNumHandlers(
+ conf.getInt(TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT,
+ TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT_DEFAULT))
+ .setSecretManager(jobTokenSecretManager).build();
+
+ // Enable service authorization?
+ if (conf.getBoolean(
+ CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
+ false)) {
+ refreshServiceAcls(conf, new TezAMPolicyProvider());
}
+
+ server.start();
+ this.address = NetUtils.getConnectAddress(server);
+ } catch (IOException e) {
+ throw new TezUncheckedException(e);
}
}
http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
index 4f9b5bf..70b0cbc 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
@@ -25,7 +25,6 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.tez.common.ReflectionUtils;
-import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezConstants;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.app.AppContext;
@@ -49,17 +48,24 @@ public class ContainerLauncherRouter extends AbstractService
public ContainerLauncherRouter(Configuration conf, AppContext context,
TaskAttemptListener taskAttemptListener,
String workingDirectory,
- String[] containerLauncherClassIdentifiers) throws UnknownHostException {
+ String[] containerLauncherClassIdentifiers,
+ boolean isPureLocalMode) throws UnknownHostException {
super(ContainerLauncherRouter.class.getName());
if (containerLauncherClassIdentifiers == null || containerLauncherClassIdentifiers.length == 0) {
- containerLauncherClassIdentifiers = new String[] {TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+ if (isPureLocalMode) {
+ containerLauncherClassIdentifiers =
+ new String[]{TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT};
+ } else {
+ containerLauncherClassIdentifiers =
+ new String[]{TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+ }
}
containerLaunchers = new ContainerLauncher[containerLauncherClassIdentifiers.length];
for (int i = 0; i < containerLauncherClassIdentifiers.length; i++) {
containerLaunchers[i] = createContainerLauncher(containerLauncherClassIdentifiers[i], context,
- taskAttemptListener, workingDirectory, conf);
+ taskAttemptListener, workingDirectory, isPureLocalMode, conf);
}
}
@@ -67,6 +73,7 @@ public class ContainerLauncherRouter extends AbstractService
AppContext context,
TaskAttemptListener taskAttemptListener,
String workingDirectory,
+ boolean isPureLocalMode,
Configuration conf) throws
UnknownHostException {
if (containerLauncherClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
@@ -76,7 +83,7 @@ public class ContainerLauncherRouter extends AbstractService
.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
LOG.info("Creating LocalContainerLauncher");
return
- new LocalContainerLauncher(context, taskAttemptListener, workingDirectory);
+ new LocalContainerLauncher(context, taskAttemptListener, workingDirectory, isPureLocalMode);
} else {
LOG.info("Creating container launcher : " + containerLauncherClassIdentifier);
Class<? extends ContainerLauncher> containerLauncherClazz =
http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
index 9a38732..18b2e35 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
@@ -36,6 +36,7 @@ import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.atomic.AtomicBoolean;
import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
@@ -90,9 +91,10 @@ public class LocalContainerLauncher extends AbstractService implements
private final AtomicBoolean serviceStopped = new AtomicBoolean(false);
private final String workingDirectory;
private final TaskAttemptListener tal;
- private final Map<String, String> localEnv = new HashMap<String, String>();
+ private final Map<String, String> localEnv;
private final ExecutionContext executionContext;
private int numExecutors;
+ private final boolean isPureLocalMode;
private final ConcurrentHashMap<ContainerId, ListenableFuture<TezChild.ContainerExecutionResult>>
runningContainers =
@@ -112,16 +114,26 @@ public class LocalContainerLauncher extends AbstractService implements
public LocalContainerLauncher(AppContext context,
TaskAttemptListener taskAttemptListener,
- String workingDirectory) throws UnknownHostException {
+ String workingDirectory,
+ boolean isPureLocalMode) throws UnknownHostException {
super(LocalContainerLauncher.class.getName());
this.context = context;
this.tal = taskAttemptListener;
this.workingDirectory = workingDirectory;
- AuxiliaryServiceHelper.setServiceDataIntoEnv(
- ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID, ByteBuffer.allocate(4).putInt(0), localEnv);
- executionContext = new ExecutionContextImpl(InetAddress.getLocalHost().getHostName());
- // User cannot be set here since it isn't available till a DAG is running.
+ this.isPureLocalMode = isPureLocalMode;
+ if (isPureLocalMode) {
+ localEnv = Maps.newHashMap();
+ AuxiliaryServiceHelper.setServiceDataIntoEnv(
+ ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID, ByteBuffer.allocate(4).putInt(0), localEnv);
+ } else {
+ localEnv = System.getenv();
+ }
+
+ // Check if the hostname is set in the environment before overriding it.
+ String host = isPureLocalMode ? InetAddress.getLocalHost().getHostName() :
+ System.getenv(Environment.NM_HOST.name());
+ executionContext = new ExecutionContextImpl(host);
}
@Override
@@ -338,7 +350,9 @@ public class LocalContainerLauncher extends AbstractService implements
InterruptedException, TezException, IOException {
Map<String, String> containerEnv = new HashMap<String, String>();
containerEnv.putAll(localEnv);
- containerEnv.put(Environment.USER.name(), context.getUser());
+ // Use the user from env if it's available.
+ String user = isPureLocalMode ? System.getenv(Environment.USER.name()) : context.getUser();
+ containerEnv.put(Environment.USER.name(), user);
long memAvailable;
synchronized (this) { // needed to fix findbugs Inconsistent synchronization warning
@@ -347,8 +361,7 @@ public class LocalContainerLauncher extends AbstractService implements
TezChild tezChild =
TezChild.newTezChild(defaultConf, null, 0, containerId.toString(), tokenIdentifier,
attemptNumber, localDirs, workingDirectory, containerEnv, "", executionContext, credentials,
- memAvailable, context.getUser());
- tezChild.setUmbilical(tezTaskUmbilicalProtocol);
+ memAvailable, context.getUser(), tezTaskUmbilicalProtocol);
return tezChild;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
index 5a0ace8..5a8e9fe 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
@@ -434,6 +434,8 @@ public class TaskSchedulerEventHandler extends AbstractService
} else {
customAppIdIdentifier = SCHEDULER_APP_ID_BASE + (j++ * SCHEDULER_APP_ID_INCREMENT);
}
+ LOG.info("ClusterIdentifier for TaskScheduler [" + i + ":" + taskSchedulerClasses[i] + "]=" +
+ customAppIdIdentifier);
taskSchedulers[i] = createTaskScheduler(host, port,
trackingUrl, appContext, taskSchedulerClasses[i], customAppIdIdentifier);
}
http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index 59efb87..4f014a4 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -511,7 +511,8 @@ public class MockDAGAppMaster extends DAGAppMaster {
// use mock container launcher for tests
@Override
protected ContainerLauncherRouter createContainerLauncherRouter(final Configuration conf,
- String[] containerLaunchers)
+ String[] containerLaunchers,
+ boolean isLocal)
throws UnknownHostException {
return new ContainerLauncherRouter(containerLauncher);
}
http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
index 98fcddc..0cf1959 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
@@ -119,7 +119,7 @@ public class TestTaskAttemptListenerImplTezDag {
doReturn(container).when(amContainer).getContainer();
taskAttemptListener = new TaskAttemptListenerImplForTest(appContext,
- mock(TaskHeartbeatHandler.class), mock(ContainerHeartbeatHandler.class), null, null);
+ mock(TaskHeartbeatHandler.class), mock(ContainerHeartbeatHandler.class), null, null, false);
taskSpec = mock(TaskSpec.class);
doReturn(taskAttemptID).when(taskSpec).getTaskAttemptID();
http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
index 4a6ce33..25d6030 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
@@ -282,7 +282,7 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
request.getContainerIdString(),
request.getTokenIdentifier(), request.getAppAttemptNumber(), workingDir, localDirs,
envMap, objectRegistry, pid,
- executionContext, credentials, memoryAvailable, request.getUser());
+ executionContext, credentials, memoryAvailable, request.getUser(), null);
ContainerExecutionResult result = tezChild.run();
LOG.info("ExecutionTime for Container: " + request.getContainerIdString() + "=" +
sw.stop().elapsedMillis());
http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
index 9c149c6..01c2080 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
@@ -40,6 +40,7 @@ import org.apache.tez.service.MiniTezTestServiceCluster;
import org.apache.tez.test.MiniTezCluster;
import org.junit.AfterClass;
import org.junit.BeforeClass;
+import org.junit.Ignore;
import org.junit.Test;
public class TestExternalTezServices {
@@ -120,26 +121,23 @@ public class TestExternalTezServices {
confForJobs.setStrings(TezConfiguration.TEZ_AM_TASK_SCHEDULERS,
TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT,
-// TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceTaskSchedulerService.class.getName());
confForJobs.setStrings(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHERS,
TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT,
-// TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceNoOpContainerLauncher.class.getName());
confForJobs.setStrings(TezConfiguration.TEZ_AM_TASK_COMMUNICATORS,
TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT,
-// TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceTaskCommunicatorImpl.class.getName());
// Default all jobs to run via the service. Individual tests override this on a per vertex/dag level.
- confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME,
- TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
- confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME,
- TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
- confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME,
- TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
+ confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME, EXT_PUSH_ENTITY_NAME);
+ confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME, EXT_PUSH_ENTITY_NAME);
+ confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME, EXT_PUSH_ENTITY_NAME);
// Setup various executor sets
PROPS_REGULAR_CONTAINERS.put(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME,
@@ -232,18 +230,55 @@ public class TestExternalTezServices {
@Test(timeout = 60000)
public void testMixed1() throws Exception { // M-ExtService, R-containers
- int expectedExternalSubmissions = 4 + 0; //4 for 4 src files, 3 for num reducers.
+ int expectedExternalSubmissions = 4 + 0; //4 for 4 src files, 0 for num reducers.
runJoinValidate("Mixed1", expectedExternalSubmissions, PROPS_EXT_SERVICE_PUSH,
PROPS_EXT_SERVICE_PUSH, PROPS_REGULAR_CONTAINERS);
}
@Test(timeout = 60000)
public void testMixed2() throws Exception { // M-Containers, R-ExtService
- int expectedExternalSubmissions = 0 + 3; //4 for 4 src files, 3 for num reducers.
+ int expectedExternalSubmissions = 0 + 3; // 3 for num reducers.
runJoinValidate("Mixed2", expectedExternalSubmissions, PROPS_REGULAR_CONTAINERS,
PROPS_REGULAR_CONTAINERS, PROPS_EXT_SERVICE_PUSH);
}
+ @Test(timeout = 60000)
+ public void testMixed3() throws Exception { // M - service, R-AM
+ int expectedExternalSubmissions = 4 + 0; //4 for 4 src files, 0 for num reducers (in-AM).
+ runJoinValidate("Mixed3", expectedExternalSubmissions, PROPS_EXT_SERVICE_PUSH,
+ PROPS_EXT_SERVICE_PUSH, PROPS_IN_AM);
+ }
+
+ @Test(timeout = 60000)
+ public void testMixed4() throws Exception { // M - containers, R-AM
+ int expectedExternalSubmissions = 0 + 0; // Nothing in external service.
+ runJoinValidate("Mixed4", expectedExternalSubmissions, PROPS_REGULAR_CONTAINERS,
+ PROPS_REGULAR_CONTAINERS, PROPS_IN_AM);
+ }
+
+ @Test(timeout = 60000)
+ public void testMixed5() throws Exception { // M1 - containers, M2-extservice, R-AM
+ int expectedExternalSubmissions = 2 + 0; // 2 for M2
+ runJoinValidate("Mixed5", expectedExternalSubmissions, PROPS_REGULAR_CONTAINERS,
+ PROPS_EXT_SERVICE_PUSH, PROPS_IN_AM);
+ }
+
+
+ @Ignore // Re-activate this after the AM registers the shuffle token with the launcher.
+ @Test(timeout = 60000)
+ public void testMixed6() throws Exception { // M - AM, R - Service
+ int expectedExternalSubmissions = 0 + 3; // 3 for R in service
+ runJoinValidate("Mixed6", expectedExternalSubmissions, PROPS_IN_AM,
+ PROPS_IN_AM, PROPS_EXT_SERVICE_PUSH);
+ }
+
+ @Test(timeout = 60000)
+ public void testMixed7() throws Exception { // M - AM, R - Containers
+ int expectedExternalSubmissions = 0; // Nothing in ext service
+ runJoinValidate("Mixed7", expectedExternalSubmissions, PROPS_IN_AM,
+ PROPS_IN_AM, PROPS_REGULAR_CONTAINERS);
+ }
+
private void runJoinValidate(String name, int extExpectedCount, Map<String, String> lhsProps,
Map<String, String> rhsProps,
http://git-wip-us.apache.org/repos/asf/tez/blob/a45ef858/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
index 3cba3ce..7615f08 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
@@ -64,6 +64,7 @@ import org.apache.tez.dag.utils.RelocalizationUtils;
import org.apache.tez.runtime.api.ExecutionContext;
import org.apache.tez.runtime.api.impl.ExecutionContextImpl;
import org.apache.tez.runtime.api.impl.TaskSpec;
+import org.apache.tez.runtime.api.impl.TezUmbilical;
import org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -93,7 +94,6 @@ public class TezChild {
private final int amHeartbeatInterval;
private final long sendCounterInterval;
private final int maxEventsToGet;
- private final boolean isLocal;
private final String workingDir;
private final ListeningExecutorService executor;
@@ -108,9 +108,10 @@ public class TezChild {
private final String user;
private Multimap<String, String> startedInputsMap = HashMultimap.create();
+ private final boolean ownUmbilical;
+ private final TezTaskUmbilicalProtocol umbilical;
private TaskReporter taskReporter;
- private TezTaskUmbilicalProtocol umbilical;
private int taskCount = 0;
private TezVertexID lastVertexID;
@@ -119,7 +120,7 @@ public class TezChild {
Map<String, String> serviceProviderEnvMap,
ObjectRegistryImpl objectRegistry, String pid,
ExecutionContext executionContext,
- Credentials credentials, long memAvailable, String user)
+ Credentials credentials, long memAvailable, String user, TezTaskUmbilicalProtocol umbilical)
throws IOException, InterruptedException {
this.defaultConf = conf;
this.containerIdString = containerIdentifier;
@@ -133,6 +134,8 @@ public class TezChild {
this.memAvailable = memAvailable;
this.user = user;
+ LOG.info("TezChild created with umbilical: " + umbilical);
+
getTaskMaxSleepTime = defaultConf.getInt(
TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX,
TezConfiguration.TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX_DEFAULT);
@@ -161,25 +164,27 @@ public class TezChild {
}
}
- this.isLocal = defaultConf.getBoolean(TezConfiguration.TEZ_LOCAL_MODE,
- TezConfiguration.TEZ_LOCAL_MODE_DEFAULT);
UserGroupInformation taskOwner = UserGroupInformation.createRemoteUser(tokenIdentifier);
Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
serviceConsumerMetadata.put(TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID,
TezCommonUtils.convertJobTokenToBytes(jobToken));
- if (!isLocal) {
+ if (umbilical == null) {
final InetSocketAddress address = NetUtils.createSocketAddrForHost(host, port);
SecurityUtil.setTokenService(jobToken, address);
taskOwner.addToken(jobToken);
- umbilical = taskOwner.doAs(new PrivilegedExceptionAction<TezTaskUmbilicalProtocol>() {
+ this.umbilical = taskOwner.doAs(new PrivilegedExceptionAction<TezTaskUmbilicalProtocol>() {
@Override
public TezTaskUmbilicalProtocol run() throws Exception {
return RPC.getProxy(TezTaskUmbilicalProtocol.class,
TezTaskUmbilicalProtocol.versionID, address, defaultConf);
}
});
+ ownUmbilical = true;
+ } else {
+ this.umbilical = umbilical;
+ ownUmbilical = false;
}
}
@@ -353,7 +358,7 @@ public class TezChild {
if (taskReporter != null) {
taskReporter.shutdown();
}
- if (!isLocal) {
+ if (ownUmbilical) {
RPC.stopProxy(umbilical);
// TODO Temporary change. Revert. Ideally, move this over to the main method in TezChild if possible.
// LogManager.shutdown();
@@ -361,12 +366,6 @@ public class TezChild {
}
}
- public void setUmbilical(TezTaskUmbilicalProtocol tezTaskUmbilicalProtocol){
- if(tezTaskUmbilicalProtocol != null){
- this.umbilical = tezTaskUmbilicalProtocol;
- }
- }
-
public static class ContainerExecutionResult {
public static enum ExitStatus {
SUCCESS(0),
@@ -412,7 +411,8 @@ public class TezChild {
public static TezChild newTezChild(Configuration conf, String host, int port, String containerIdentifier,
String tokenIdentifier, int attemptNumber, String[] localDirs, String workingDirectory,
Map<String, String> serviceProviderEnvMap, @Nullable String pid,
- ExecutionContext executionContext, Credentials credentials, long memAvailable, String user)
+ ExecutionContext executionContext, Credentials credentials, long memAvailable, String user,
+ TezTaskUmbilicalProtocol tezUmbilical)
throws IOException, InterruptedException, TezException {
// Pull in configuration specified for the session.
@@ -425,7 +425,7 @@ public class TezChild {
return new TezChild(conf, host, port, containerIdentifier, tokenIdentifier,
attemptNumber, workingDirectory, localDirs, serviceProviderEnvMap, objectRegistry, pid,
- executionContext, credentials, memAvailable, user);
+ executionContext, credentials, memAvailable, user, tezUmbilical);
}
public static void main(String[] args) throws IOException, InterruptedException, TezException {
@@ -459,7 +459,7 @@ public class TezChild {
tokenIdentifier, attemptNumber, localDirs, System.getenv(Environment.PWD.name()),
System.getenv(), pid, new ExecutionContextImpl(System.getenv(Environment.NM_HOST.name())),
credentials, Runtime.getRuntime().maxMemory(), System
- .getenv(ApplicationConstants.Environment.USER.toString()));
+ .getenv(ApplicationConstants.Environment.USER.toString()), null);
tezChild.run();
}
[30/43] tez git commit: TEZ-2175. Task priority should be available
to the TaskCommunicator plugin. (sseth)
Posted by ss...@apache.org.
TEZ-2175. Task priority should be available to the TaskCommunicator plugin. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/99a1b85e
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/99a1b85e
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/99a1b85e
Branch: refs/heads/TEZ-2003
Commit: 99a1b85e2275c4635b2bddbdfd258c43116ebd9e
Parents: 6014c96
Author: Siddharth Seth <ss...@apache.org>
Authored: Tue Mar 10 00:47:07 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:30 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../src/main/java/org/apache/tez/dag/api/TaskCommunicator.java | 2 +-
.../org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java | 2 +-
.../java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java | 3 ++-
.../dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java | 5 +++--
5 files changed, 8 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/99a1b85e/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 5c5fd8e..7726815 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -9,5 +9,6 @@ ALL CHANGES:
TEZ-2131. Add additional tests for tasks running in the AM.
TEZ-2138. Fix minor bugs in adding default scheduler, getting launchers.
TEZ-2139. Update tez version to 0.7.0-TEZ-2003-SNAPSHOT.
+ TEZ-2175. Task priority should be available to the TaskCommunicator plugin.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/99a1b85e/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
index c9f85e0..82eed20 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
@@ -43,7 +43,7 @@ public abstract class TaskCommunicator extends AbstractService {
public abstract void registerRunningTaskAttempt(ContainerId containerId, TaskSpec taskSpec,
Map<String, LocalResource> additionalResources,
Credentials credentials,
- boolean credentialsChanged);
+ boolean credentialsChanged, int priority);
// TODO TEZ-2003 Remove reference to TaskAttemptID
public abstract void unregisterRunningTaskAttempt(TezTaskAttemptID taskAttemptID);
http://git-wip-us.apache.org/repos/asf/tez/blob/99a1b85e/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index 8346839..b570301 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -360,7 +360,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
taskCommunicators[taskCommId].registerRunningTaskAttempt(containerId, amContainerTask.getTask(),
amContainerTask.getAdditionalResources(), amContainerTask.getCredentials(),
- amContainerTask.haveCredentialsChanged());
+ amContainerTask.haveCredentialsChanged(), amContainerTask.getPriority());
}
@Override
http://git-wip-us.apache.org/repos/asf/tez/blob/99a1b85e/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
index 0bf1b5d..f288748 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -194,7 +194,8 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
@Override
public void registerRunningTaskAttempt(ContainerId containerId, TaskSpec taskSpec,
Map<String, LocalResource> additionalResources,
- Credentials credentials, boolean credentialsChanged) {
+ Credentials credentials, boolean credentialsChanged,
+ int priority) {
ContainerInfo containerInfo = registeredContainers.get(containerId);
Preconditions.checkNotNull(containerInfo,
http://git-wip-us.apache.org/repos/asf/tez/blob/99a1b85e/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
index 78cdcde..a327caf 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
@@ -100,9 +100,10 @@ public class TezTestServiceTaskCommunicatorImpl extends TezTaskCommunicatorImpl
public void registerRunningTaskAttempt(final ContainerId containerId, final TaskSpec taskSpec,
Map<String, LocalResource> additionalResources,
Credentials credentials,
- boolean credentialsChanged) {
+ boolean credentialsChanged,
+ int priority) {
super.registerRunningTaskAttempt(containerId, taskSpec, additionalResources, credentials,
- credentialsChanged);
+ credentialsChanged, priority);
SubmitWorkRequestProto requestProto = null;
try {
requestProto = constructSubmitWorkRequest(containerId, taskSpec);
[19/43] tez git commit: TEZ-2006. Task communication plane needs to
be pluggable. (sseth)
Posted by ss...@apache.org.
TEZ-2006. Task communication plane needs to be pluggable. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/7b71d3b7
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/7b71d3b7
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/7b71d3b7
Branch: refs/heads/TEZ-2003
Commit: 7b71d3b762f9bf84d05caaa6c6cc48028478a07f
Parents: 44bea93
Author: Siddharth Seth <ss...@apache.org>
Authored: Thu Feb 12 11:25:45 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:40:12 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../apache/tez/dag/api/TaskCommunicator.java | 54 ++
.../tez/dag/api/TaskCommunicatorContext.java | 48 ++
.../tez/dag/api/TaskHeartbeatRequest.java | 63 +++
.../tez/dag/api/TaskHeartbeatResponse.java | 39 ++
.../java/org/apache/tez/dag/app/AppContext.java | 3 +
.../org/apache/tez/dag/app/DAGAppMaster.java | 5 +
.../dag/app/TaskAttemptListenerImpTezDag.java | 522 +++++++------------
.../tez/dag/app/TezTaskCommunicatorImpl.java | 474 +++++++++++++++++
.../app/launcher/LocalContainerLauncher.java | 10 +-
.../tez/dag/app/rm/container/AMContainer.java | 3 +-
.../rm/container/AMContainerEventAssignTA.java | 2 +
.../dag/app/rm/container/AMContainerImpl.java | 1 +
.../apache/tez/dag/app/MockDAGAppMaster.java | 27 +-
.../app/TestTaskAttemptListenerImplTezDag.java | 82 +--
15 files changed, 967 insertions(+), 367 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 1822fcb..d7e4be5 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -1,4 +1,5 @@
ALL CHANGES:
TEZ-2019. Temporarily allow the scheduler and launcher to be specified via configuration.
+ TEZ-2006. Task communication plane needs to be pluggable.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
new file mode 100644
index 0000000..97f9c16
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.api;
+
+import java.net.InetSocketAddress;
+import java.util.Map;
+
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.runtime.api.impl.TaskSpec;
+
+// TODO TEZ-2003 Move this into the tez-api module
+public abstract class TaskCommunicator extends AbstractService {
+ public TaskCommunicator(String name) {
+ super(name);
+ }
+
+ // TODO TEZ-2003 Ideally, don't expose YARN containerId; instead expose a Tez specific construct.
+ // TODO When talking to an external service, this plugin implementer may need access to a host:port
+ public abstract void registerRunningContainer(ContainerId containerId, String hostname, int port);
+
+ // TODO TEZ-2003 Ideally, don't expose YARN containerId; instead expose a Tez specific construct.
+ public abstract void registerContainerEnd(ContainerId containerId);
+
+ // TODO TEZ-2003 TaskSpec breakup into a clean interface
+ // TODO TEZ-2003 Add support for priority
+ public abstract void registerRunningTaskAttempt(ContainerId containerId, TaskSpec taskSpec,
+ Map<String, LocalResource> additionalResources,
+ Credentials credentials,
+ boolean credentialsChanged);
+
+ // TODO TEZ-2003 Remove reference to TaskAttemptID
+ public abstract void unregisterRunningTaskAttempt(TezTaskAttemptID taskAttemptID);
+
+ // TODO TEZ-2003 This doesn't necessarily belong here. A server may not start within the AM.
+ public abstract InetSocketAddress getAddress();
+
+ // TODO Eventually. Add methods here to support preemption of tasks.
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
new file mode 100644
index 0000000..9b2d889
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.api;
+
+import java.io.IOException;
+
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+
+
+// Do not make calls into this from within a held lock.
+
+// TODO TEZ-2003 Move this into the tez-api module
+public interface TaskCommunicatorContext {
+
+ // TODO TEZ-2003 Add signalling back into this to indicate errors - e.g. Container unregsitered, task no longer running, etc.
+
+ // TODO TEZ-2003 Maybe add book-keeping as a helper library, instead of each impl tracking container to task etc.
+
+ ApplicationAttemptId getApplicationAttemptId();
+ Credentials getCredentials();
+
+ // TODO TEZ-2003 Move to vertex, taskIndex, version
+ boolean canCommit(TezTaskAttemptID taskAttemptId) throws IOException;
+
+ TaskHeartbeatResponse heartbeat(TaskHeartbeatRequest request) throws IOException, TezException;
+
+ boolean isKnownContainer(ContainerId containerId);
+
+ // TODO TEZ-2003 Move to vertex, taskIndex, version
+ void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId);
+
+ // TODO Eventually Add methods to report availability stats to the scheduler.
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/api/TaskHeartbeatRequest.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskHeartbeatRequest.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskHeartbeatRequest.java
new file mode 100644
index 0000000..f6bc8f0
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskHeartbeatRequest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.api;
+
+import java.util.List;
+
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.runtime.api.impl.TezEvent;
+
+// TODO TEZ-2003 Move this into the tez-api module
+public class TaskHeartbeatRequest {
+
+ // TODO TEZ-2003 Ideally containerIdentifier should not be part of the request.
+ // Replace with a task lookup - vertex name + task index
+ private final String containerIdentifier;
+ // TODO TEZ-2003 Get rid of the task attemptId reference if possible
+ private final TezTaskAttemptID taskAttemptId;
+ private final List<TezEvent> events;
+ private final int startIndex;
+ private final int maxEvents;
+
+
+ public TaskHeartbeatRequest(String containerIdentifier, TezTaskAttemptID taskAttemptId, List<TezEvent> events, int startIndex,
+ int maxEvents) {
+ this.containerIdentifier = containerIdentifier;
+ this.taskAttemptId = taskAttemptId;
+ this.events = events;
+ this.startIndex = startIndex;
+ this.maxEvents = maxEvents;
+ }
+
+ public String getContainerIdentifier() {
+ return containerIdentifier;
+ }
+
+ public TezTaskAttemptID getTaskAttemptId() {
+ return taskAttemptId;
+ }
+
+ public List<TezEvent> getEvents() {
+ return events;
+ }
+
+ public int getStartIndex() {
+ return startIndex;
+ }
+
+ public int getMaxEvents() {
+ return maxEvents;
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/api/TaskHeartbeatResponse.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskHeartbeatResponse.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskHeartbeatResponse.java
new file mode 100644
index 0000000..c82a743
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskHeartbeatResponse.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.api;
+
+import java.util.List;
+
+import org.apache.tez.runtime.api.impl.TezEvent;
+
+// TODO TEZ-2003 Move this into the tez-api module
+public class TaskHeartbeatResponse {
+
+ private final boolean shouldDie;
+ private List<TezEvent> events;
+
+ public TaskHeartbeatResponse(boolean shouldDie, List<TezEvent> events) {
+ this.shouldDie = shouldDie;
+ this.events = events;
+ }
+
+ public boolean isShouldDie() {
+ return shouldDie;
+ }
+
+ public List<TezEvent> getEvents() {
+ return events;
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java b/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java
index 4781784..37f7624 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java
@@ -24,6 +24,7 @@ import java.util.Set;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -106,4 +107,6 @@ public interface AppContext {
String[] getLocalDirs();
String getAMUser();
+
+ Credentials getAppCredentials();
}
http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index 73ee56e..bfc2d58 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -1486,6 +1486,11 @@ public class DAGAppMaster extends AbstractService {
}
@Override
+ public Credentials getAppCredentials() {
+ return amCredentials;
+ }
+
+ @Override
public Map<ApplicationAccessType, String> getApplicationACLs() {
if (getServiceState() != STATE.STARTED) {
throw new TezUncheckedException(
http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index 970489d..0d9dc31 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -18,15 +18,14 @@
package org.apache.tez.dag.app;
import java.io.IOException;
-import java.net.InetAddress;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
import java.net.InetSocketAddress;
import java.net.URISyntaxException;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
@@ -38,216 +37,212 @@ import org.apache.tez.runtime.api.events.TaskStatusUpdateEvent;
import org.apache.tez.runtime.api.impl.EventType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
-import org.apache.hadoop.ipc.ProtocolSignature;
-import org.apache.hadoop.ipc.RPC;
-import org.apache.hadoop.ipc.Server;
-import org.apache.hadoop.net.NetUtils;
-import org.apache.hadoop.security.authorize.PolicyProvider;
+import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.tez.common.ReflectionUtils;
+import org.apache.tez.dag.api.TaskCommunicator;
+import org.apache.tez.dag.api.TaskCommunicatorContext;
+import org.apache.tez.dag.api.TaskHeartbeatResponse;
+import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.util.ConverterUtils;
-import org.apache.tez.common.ContainerContext;
-import org.apache.tez.common.ContainerTask;
-import org.apache.tez.common.TezConverterUtils;
-import org.apache.tez.common.TezLocalResource;
-import org.apache.tez.common.TezTaskUmbilicalProtocol;
-import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TaskHeartbeatRequest;
import org.apache.tez.dag.app.dag.DAG;
import org.apache.tez.dag.app.dag.Task;
import org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely;
import org.apache.tez.dag.app.dag.event.VertexEventRouteEvent;
+import org.apache.tez.dag.app.rm.TaskSchedulerService;
import org.apache.tez.dag.app.rm.container.AMContainerTask;
-import org.apache.tez.dag.app.security.authorize.TezAMPolicyProvider;
import org.apache.tez.dag.records.TezTaskAttemptID;
import org.apache.tez.dag.records.TezVertexID;
import org.apache.tez.runtime.api.impl.TezEvent;
-import org.apache.tez.runtime.api.impl.TezHeartbeatRequest;
-import org.apache.tez.runtime.api.impl.TezHeartbeatResponse;
import org.apache.tez.common.security.JobTokenSecretManager;
-import com.google.common.collect.Maps;
@SuppressWarnings("unchecked")
+@InterfaceAudience.Private
public class TaskAttemptListenerImpTezDag extends AbstractService implements
- TezTaskUmbilicalProtocol, TaskAttemptListener {
-
- private static final ContainerTask TASK_FOR_INVALID_JVM = new ContainerTask(
- null, true, null, null, false);
+ TaskAttemptListener, TaskCommunicatorContext {
private static final Logger LOG = LoggerFactory
.getLogger(TaskAttemptListenerImpTezDag.class);
private final AppContext context;
+ private TaskCommunicator taskCommunicator;
protected final TaskHeartbeatHandler taskHeartbeatHandler;
protected final ContainerHeartbeatHandler containerHeartbeatHandler;
- private final JobTokenSecretManager jobTokenSecretManager;
- private InetSocketAddress address;
- private Server server;
-
- static class ContainerInfo {
- ContainerInfo() {
- this.lastReponse = null;
- this.lastRequestId = 0;
- this.amContainerTask = null;
- this.taskPulled = false;
+
+ private final TaskHeartbeatResponse RESPONSE_SHOULD_DIE = new TaskHeartbeatResponse(true, null);
+
+ private final ConcurrentMap<TezTaskAttemptID, ContainerId> registeredAttempts =
+ new ConcurrentHashMap<TezTaskAttemptID, ContainerId>();
+ private final ConcurrentMap<ContainerId, ContainerInfo> registeredContainers =
+ new ConcurrentHashMap<ContainerId, ContainerInfo>();
+
+ // Defined primarily to work around ConcurrentMaps not accepting null values
+ private static final class ContainerInfo {
+ TezTaskAttemptID taskAttemptId;
+ ContainerInfo(TezTaskAttemptID taskAttemptId) {
+ this.taskAttemptId = taskAttemptId;
}
- long lastRequestId;
- TezHeartbeatResponse lastReponse;
- AMContainerTask amContainerTask;
- boolean taskPulled;
}
- private ConcurrentMap<TezTaskAttemptID, ContainerId> attemptToInfoMap =
- new ConcurrentHashMap<TezTaskAttemptID, ContainerId>();
+ private static final ContainerInfo NULL_CONTAINER_INFO = new ContainerInfo(null);
- private ConcurrentHashMap<ContainerId, ContainerInfo> registeredContainers =
- new ConcurrentHashMap<ContainerId, ContainerInfo>();
public TaskAttemptListenerImpTezDag(AppContext context,
- TaskHeartbeatHandler thh, ContainerHeartbeatHandler chh,
- JobTokenSecretManager jobTokenSecretManager) {
+ TaskHeartbeatHandler thh, ContainerHeartbeatHandler chh,
+ // TODO TEZ-2003 pre-merge. Remove reference to JobTokenSecretManager.
+ JobTokenSecretManager jobTokenSecretManager) {
super(TaskAttemptListenerImpTezDag.class.getName());
this.context = context;
- this.jobTokenSecretManager = jobTokenSecretManager;
this.taskHeartbeatHandler = thh;
this.containerHeartbeatHandler = chh;
+ this.taskCommunicator = new TezTaskCommunicatorImpl(this);
}
@Override
- public void serviceStart() {
- startRpcServer();
- }
-
- protected void startRpcServer() {
- Configuration conf = getConfig();
- if (!conf.getBoolean(TezConfiguration.TEZ_LOCAL_MODE, TezConfiguration.TEZ_LOCAL_MODE_DEFAULT)) {
- try {
- server = new RPC.Builder(conf)
- .setProtocol(TezTaskUmbilicalProtocol.class)
- .setBindAddress("0.0.0.0")
- .setPort(0)
- .setInstance(this)
- .setNumHandlers(
- conf.getInt(TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT,
- TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT_DEFAULT))
- .setSecretManager(jobTokenSecretManager).build();
-
- // Enable service authorization?
- if (conf.getBoolean(
- CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
- false)) {
- refreshServiceAcls(conf, new TezAMPolicyProvider());
- }
-
- server.start();
- this.address = NetUtils.getConnectAddress(server);
- } catch (IOException e) {
- throw new TezUncheckedException(e);
- }
+ public void serviceInit(Configuration conf) {
+ String taskCommClassName = conf.get(TezConfiguration.TEZ_AM_TASK_COMMUNICATOR_CLASS);
+ if (taskCommClassName == null) {
+ LOG.info("Using Default Task Communicator");
+ this.taskCommunicator = new TezTaskCommunicatorImpl(this);
} else {
+ LOG.info("Using TaskCommunicator: " + taskCommClassName);
+ Class<? extends TaskCommunicator> taskCommClazz = (Class<? extends TaskCommunicator>) ReflectionUtils
+ .getClazz(taskCommClassName);
try {
- this.address = new InetSocketAddress(InetAddress.getLocalHost(), 0);
- } catch (UnknownHostException e) {
+ Constructor<? extends TaskCommunicator> ctor = taskCommClazz.getConstructor(TaskCommunicatorContext.class);
+ ctor.setAccessible(true);
+ this.taskCommunicator = ctor.newInstance(this);
+ } catch (NoSuchMethodException e) {
+ throw new TezUncheckedException(e);
+ } catch (InvocationTargetException e) {
+ throw new TezUncheckedException(e);
+ } catch (InstantiationException e) {
+ throw new TezUncheckedException(e);
+ } catch (IllegalAccessException e) {
throw new TezUncheckedException(e);
- }
- if (LOG.isDebugEnabled()) {
- LOG.debug("Not starting TaskAttemptListener RPC in LocalMode");
}
}
}
- void refreshServiceAcls(Configuration configuration,
- PolicyProvider policyProvider) {
- this.server.refreshServiceAcl(configuration, policyProvider);
+ @Override
+ public void serviceStart() {
+ taskCommunicator.init(getConfig());
+ taskCommunicator.start();
}
@Override
public void serviceStop() {
- stopRpcServer();
- }
-
- protected void stopRpcServer() {
- if (server != null) {
- server.stop();
+ if (taskCommunicator != null) {
+ taskCommunicator.stop();
+ taskCommunicator = null;
}
}
- public InetSocketAddress getAddress() {
- return address;
- }
-
@Override
- public long getProtocolVersion(String protocol, long clientVersion)
- throws IOException {
- return versionID;
+ public ApplicationAttemptId getApplicationAttemptId() {
+ return context.getApplicationAttemptId();
}
@Override
- public ProtocolSignature getProtocolSignature(String protocol,
- long clientVersion, int clientMethodsHash) throws IOException {
- return ProtocolSignature.getProtocolSignature(this, protocol,
- clientVersion, clientMethodsHash);
+ public Credentials getCredentials() {
+ return context.getAppCredentials();
}
@Override
- public ContainerTask getTask(ContainerContext containerContext)
- throws IOException {
+ public TaskHeartbeatResponse heartbeat(TaskHeartbeatRequest request)
+ throws IOException, TezException {
+ ContainerId containerId = ConverterUtils.toContainerId(request
+ .getContainerIdentifier());
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Received heartbeat from container"
+ + ", request=" + request);
+ }
- ContainerTask task = null;
+ if (!registeredContainers.containsKey(containerId)) {
+ LOG.warn("Received task heartbeat from unknown container with id: " + containerId +
+ ", asking it to die");
+ return RESPONSE_SHOULD_DIE;
+ }
- if (containerContext == null || containerContext.getContainerIdentifier() == null) {
- LOG.info("Invalid task request with an empty containerContext or containerId");
- task = TASK_FOR_INVALID_JVM;
- } else {
- ContainerId containerId = ConverterUtils.toContainerId(containerContext
- .getContainerIdentifier());
+ // A heartbeat can come in anytime. The AM may have made a decision to kill a running task/container
+ // meanwhile. If the decision is processed through the pipeline before the heartbeat is processed,
+ // the heartbeat will be dropped. Otherwise the heartbeat will be processed - and the system
+ // know how to handle this - via FailedInputEvents for example (relevant only if the heartbeat has events).
+ // So - avoiding synchronization.
+
+ pingContainerHeartbeatHandler(containerId);
+ List<TezEvent> outEvents = null;
+ TezTaskAttemptID taskAttemptID = request.getTaskAttemptId();
+ if (taskAttemptID != null) {
+ ContainerId containerIdFromMap = registeredAttempts.get(taskAttemptID);
+ if (containerIdFromMap == null || !containerIdFromMap.equals(containerId)) {
+ // This can happen when a task heartbeats. Meanwhile the container is unregistered.
+ // The information will eventually make it through to the plugin via a corresponding unregister.
+ // There's a race in that case between the unregister making it through, and this method returning.
+ // TODO TEZ-2003. An exception back is likely a better approach than sending a shouldDie = true,
+ // so that the plugin can handle the scenario. Alternately augment the response with error codes.
+ // Error codes would be better than exceptions.
+ LOG.info("Attempt: " + taskAttemptID + " is not recognized for heartbeats");
+ return RESPONSE_SHOULD_DIE;
+ }
+
+ List<TezEvent> inEvents = request.getEvents();
if (LOG.isDebugEnabled()) {
- LOG.debug("Container with id: " + containerId + " asked for a task");
+ LOG.debug("Ping from " + taskAttemptID.toString() +
+ " events: " + (inEvents != null ? inEvents.size() : -1));
}
- if (!registeredContainers.containsKey(containerId)) {
- if(context.getAllContainers().get(containerId) == null) {
- LOG.info("Container with id: " + containerId
- + " is invalid and will be killed");
- } else {
- LOG.info("Container with id: " + containerId
- + " is valid, but no longer registered, and will be killed");
- }
- task = TASK_FOR_INVALID_JVM;
- } else {
- pingContainerHeartbeatHandler(containerId);
- task = getContainerTask(containerId);
- if (task == null) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("No task current assigned to Container with id: " + containerId);
- }
- } else if (task == TASK_FOR_INVALID_JVM) {
- LOG.info("Container with id: " + containerId
- + " is valid, but no longer registered, and will be killed. Race condition.");
+
+ List<TezEvent> otherEvents = new ArrayList<TezEvent>();
+ for (TezEvent tezEvent : ListUtils.emptyIfNull(inEvents)) {
+ final EventType eventType = tezEvent.getEventType();
+ if (eventType == EventType.TASK_STATUS_UPDATE_EVENT ||
+ eventType == EventType.TASK_ATTEMPT_COMPLETED_EVENT) {
+ context.getEventHandler()
+ .handle(getTaskAttemptEventFromTezEvent(taskAttemptID, tezEvent));
} else {
- context.getEventHandler().handle(
- new TaskAttemptEventStartedRemotely(task.getTaskSpec()
- .getTaskAttemptID(), containerId, context
- .getApplicationACLs()));
- LOG.info("Container with id: " + containerId + " given task: "
- + task.getTaskSpec().getTaskAttemptID());
+ otherEvents.add(tezEvent);
}
}
+ if(!otherEvents.isEmpty()) {
+ TezVertexID vertexId = taskAttemptID.getTaskID().getVertexID();
+ context.getEventHandler().handle(
+ new VertexEventRouteEvent(vertexId, Collections.unmodifiableList(otherEvents)));
+ }
+ taskHeartbeatHandler.pinged(taskAttemptID);
+ outEvents = context
+ .getCurrentDAG()
+ .getVertex(taskAttemptID.getTaskID().getVertexID())
+ .getTask(taskAttemptID.getTaskID())
+ .getTaskAttemptTezEvents(taskAttemptID, request.getStartIndex(),
+ request.getMaxEvents());
}
- if (LOG.isDebugEnabled()) {
- LOG.debug("getTask returning task: " + task);
- }
- return task;
+ return new TaskHeartbeatResponse(false, outEvents);
+ }
+
+ @Override
+ public boolean isKnownContainer(ContainerId containerId) {
+ return context.getAllContainers().get(containerId) != null;
+ }
+
+ @Override
+ public void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId) {
+ context.getEventHandler().handle(new TaskAttemptEventStartedRemotely(taskAttemptID, containerId, null));
+ pingContainerHeartbeatHandler(containerId);
}
/**
* Child checking whether it can commit.
- *
+ * <p/>
* <br/>
* Repeatedly polls the ApplicationMaster whether it
* {@link Task#canCommit(TezTaskAttemptID)} This is * a legacy from the
@@ -270,25 +265,12 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
@Override
- public void unregisterTaskAttempt(TezTaskAttemptID attemptId) {
- ContainerId containerId = attemptToInfoMap.get(attemptId);
- if(containerId == null) {
- LOG.warn("Unregister task attempt: " + attemptId + " from unknown container");
- return;
- }
- ContainerInfo containerInfo = registeredContainers.get(containerId);
- if(containerInfo == null) {
- LOG.warn("Unregister task attempt: " + attemptId +
- " from non-registered container: " + containerId);
- return;
- }
- synchronized (containerInfo) {
- containerInfo.amContainerTask = null;
- attemptToInfoMap.remove(attemptId);
- }
-
+ public InetSocketAddress getAddress() {
+ return taskCommunicator.getAddress();
}
+ // The TaskAttemptListener register / unregister methods in this class are not thread safe.
+ // The Tez framework should not invoke these methods from multiple threads.
@Override
public void dagComplete(DAG dag) {
// TODO TEZ-2335. Cleanup TaskHeartbeat handler structures.
@@ -308,50 +290,82 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
@Override
public void registerRunningContainer(ContainerId containerId) {
if (LOG.isDebugEnabled()) {
- LOG.debug("ContainerId: " + containerId
- + " registered with TaskAttemptListener");
+ LOG.debug("ContainerId: " + containerId + " registered with TaskAttemptListener");
}
- ContainerInfo oldInfo = registeredContainers.put(containerId, new ContainerInfo());
- if(oldInfo != null) {
+ ContainerInfo oldInfo = registeredContainers.put(containerId, NULL_CONTAINER_INFO);
+ if (oldInfo != null) {
throw new TezUncheckedException(
"Multiple registrations for containerId: " + containerId);
}
+ NodeId nodeId = context.getAllContainers().get(containerId).getContainer().getNodeId();
+ taskCommunicator.registerRunningContainer(containerId, nodeId.getHost(), nodeId.getPort());
+ }
+
+ @Override
+ public void unregisterRunningContainer(ContainerId containerId) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Unregistering Container from TaskAttemptListener: " + containerId);
+ }
+ ContainerInfo containerInfo = registeredContainers.remove(containerId);
+ if (containerInfo.taskAttemptId != null) {
+ registeredAttempts.remove(containerInfo.taskAttemptId);
+ }
+ taskCommunicator.registerContainerEnd(containerId);
}
@Override
public void registerTaskAttempt(AMContainerTask amContainerTask,
- ContainerId containerId) {
+ ContainerId containerId) {
ContainerInfo containerInfo = registeredContainers.get(containerId);
- if(containerInfo == null) {
+ if (containerInfo == null) {
throw new TezUncheckedException("Registering task attempt: "
+ amContainerTask.getTask().getTaskAttemptID() + " to unknown container: " + containerId);
}
- synchronized (containerInfo) {
- if(containerInfo.amContainerTask != null) {
- throw new TezUncheckedException("Registering task attempt: "
- + amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId
- + " with existing assignment to: " + containerInfo.amContainerTask.getTask().getTaskAttemptID());
- }
- containerInfo.amContainerTask = amContainerTask;
- containerInfo.taskPulled = false;
-
- ContainerId containerIdFromMap =
- attemptToInfoMap.put(amContainerTask.getTask().getTaskAttemptID(), containerId);
- if(containerIdFromMap != null) {
- throw new TezUncheckedException("Registering task attempt: "
- + amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId
- + " when already assigned to: " + containerIdFromMap);
- }
+ if (containerInfo.taskAttemptId != null) {
+ throw new TezUncheckedException("Registering task attempt: "
+ + amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId
+ + " with existing assignment to: " +
+ containerInfo.taskAttemptId);
}
+
+ if (containerInfo.taskAttemptId != null) {
+ throw new TezUncheckedException("Registering task attempt: "
+ + amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId
+ + " with existing assignment to: " +
+ containerInfo.taskAttemptId);
+ }
+
+ // Explicitly putting in a new entry so that synchronization is not required on the existing element in the map.
+ registeredContainers.put(containerId, new ContainerInfo(amContainerTask.getTask().getTaskAttemptID()));
+
+ ContainerId containerIdFromMap = registeredAttempts.put(
+ amContainerTask.getTask().getTaskAttemptID(), containerId);
+ if (containerIdFromMap != null) {
+ throw new TezUncheckedException("Registering task attempt: "
+ + amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId
+ + " when already assigned to: " + containerIdFromMap);
+ }
+ taskCommunicator.registerRunningTaskAttempt(containerId, amContainerTask.getTask(),
+ amContainerTask.getAdditionalResources(), amContainerTask.getCredentials(),
+ amContainerTask.haveCredentialsChanged());
}
@Override
- public void unregisterRunningContainer(ContainerId containerId) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Unregistering Container from TaskAttemptListener: "
- + containerId);
+ public void unregisterTaskAttempt(TezTaskAttemptID attemptId) {
+ ContainerId containerId = registeredAttempts.remove(attemptId);
+ if (containerId == null) {
+ LOG.warn("Unregister task attempt: " + attemptId + " from unknown container");
+ return;
+ }
+ ContainerInfo containerInfo = registeredContainers.get(containerId);
+ if (containerInfo == null) {
+ LOG.warn("Unregister task attempt: " + attemptId +
+ " from non-registered container: " + containerId);
+ return;
}
- registeredContainers.remove(containerId);
+ // Explicitly putting in a new entry so that synchronization is not required on the existing element in the map.
+ registeredContainers.put(containerId, NULL_CONTAINER_INFO);
+ taskCommunicator.unregisterRunningTaskAttempt(attemptId);
}
private void pingContainerHeartbeatHandler(ContainerId containerId) {
@@ -359,7 +373,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
private void pingContainerHeartbeatHandler(TezTaskAttemptID taskAttemptId) {
- ContainerId containerId = attemptToInfoMap.get(taskAttemptId);
+ ContainerId containerId = registeredAttempts.get(taskAttemptId);
if (containerId != null) {
containerHeartbeatHandler.pinged(containerId);
} else {
@@ -368,141 +382,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
}
- @Override
- public TezHeartbeatResponse heartbeat(TezHeartbeatRequest request)
- throws IOException, TezException {
- ContainerId containerId = ConverterUtils.toContainerId(request
- .getContainerIdentifier());
- long requestId = request.getRequestId();
- if (LOG.isDebugEnabled()) {
- LOG.debug("Received heartbeat from container"
- + ", request=" + request);
- }
-
- ContainerInfo containerInfo = registeredContainers.get(containerId);
- if(containerInfo == null) {
- LOG.warn("Received task heartbeat from unknown container with id: " + containerId +
- ", asking it to die");
- TezHeartbeatResponse response = new TezHeartbeatResponse();
- response.setLastRequestId(requestId);
- response.setShouldDie();
- return response;
- }
-
- synchronized (containerInfo) {
- pingContainerHeartbeatHandler(containerId);
-
- if(containerInfo.lastRequestId == requestId) {
- LOG.warn("Old sequenceId received: " + requestId
- + ", Re-sending last response to client");
- return containerInfo.lastReponse;
- }
-
- TezHeartbeatResponse response = new TezHeartbeatResponse();
- response.setLastRequestId(requestId);
-
- TezTaskAttemptID taskAttemptID = request.getCurrentTaskAttemptID();
- if (taskAttemptID != null) {
- ContainerId containerIdFromMap = attemptToInfoMap.get(taskAttemptID);
- if(containerIdFromMap == null || !containerIdFromMap.equals(containerId)) {
- throw new TezException("Attempt " + taskAttemptID
- + " is not recognized for heartbeat");
- }
-
- if(containerInfo.lastRequestId+1 != requestId) {
- throw new TezException("Container " + containerId
- + " has invalid request id. Expected: "
- + containerInfo.lastRequestId+1
- + " and actual: " + requestId);
- }
-
- List<TezEvent> inEvents = request.getEvents();
- if (LOG.isDebugEnabled()) {
- LOG.debug("Ping from " + taskAttemptID.toString() +
- " events: " + (inEvents != null? inEvents.size() : -1));
- }
-
- List<TezEvent> otherEvents = new ArrayList<TezEvent>();
- // route TASK_STATUS_UPDATE_EVENT directly to TaskAttempt and route other events
- // (DATA_MOVEMENT_EVENT, TASK_ATTEMPT_COMPLETED_EVENT, TASK_ATTEMPT_FAILED_EVENT)
- // to VertexImpl to ensure the events ordering
- // 1. DataMovementEvent is logged as RecoveryEvent before TaskAttemptFinishedEvent
- // 2. TaskStatusEvent is handled before TaskAttemptFinishedEvent
- for (TezEvent tezEvent : ListUtils.emptyIfNull(inEvents)) {
- final EventType eventType = tezEvent.getEventType();
- if (eventType == EventType.TASK_STATUS_UPDATE_EVENT) {
- TaskAttemptEvent taskAttemptEvent = new TaskAttemptEventStatusUpdate(taskAttemptID,
- (TaskStatusUpdateEvent) tezEvent.getEvent());
- context.getEventHandler().handle(taskAttemptEvent);
- } else {
- otherEvents.add(tezEvent);
- }
- }
- if(!otherEvents.isEmpty()) {
- TezVertexID vertexId = taskAttemptID.getTaskID().getVertexID();
- context.getEventHandler().handle(
- new VertexEventRouteEvent(vertexId, Collections.unmodifiableList(otherEvents)));
- }
- taskHeartbeatHandler.pinged(taskAttemptID);
- TaskAttemptEventInfo eventInfo = context
- .getCurrentDAG()
- .getVertex(taskAttemptID.getTaskID().getVertexID())
- .getTaskAttemptTezEvents(taskAttemptID, request.getStartIndex(),
- request.getMaxEvents());
- response.setEvents(eventInfo.getEvents());
- response.setNextFromEventId(eventInfo.getNextFromEventId());
- }
- containerInfo.lastRequestId = requestId;
- containerInfo.lastReponse = response;
- return response;
- }
- }
-
- private Map<String, TezLocalResource> convertLocalResourceMap(Map<String, LocalResource> ylrs)
- throws IOException {
- Map<String, TezLocalResource> tlrs = Maps.newHashMap();
- if (ylrs != null) {
- for (Entry<String, LocalResource> ylrEntry : ylrs.entrySet()) {
- TezLocalResource tlr;
- try {
- tlr = TezConverterUtils.convertYarnLocalResourceToTez(ylrEntry.getValue());
- } catch (URISyntaxException e) {
- throw new IOException(e);
- }
- tlrs.put(ylrEntry.getKey(), tlr);
- }
- }
- return tlrs;
- }
-
- private ContainerTask getContainerTask(ContainerId containerId) throws IOException {
- ContainerTask containerTask = null;
- ContainerInfo containerInfo = registeredContainers.get(containerId);
- if (containerInfo == null) {
- // This can happen if an unregisterTask comes in after we've done the initial checks for
- // registered containers. (Race between getTask from the container, and a potential STOP_CONTAINER
- // from somewhere within the AM)
- // Implies that an un-registration has taken place and the container needs to be asked to die.
- LOG.info("Container with id: " + containerId
- + " is valid, but no longer registered, and will be killed");
- containerTask = TASK_FOR_INVALID_JVM;
- } else {
- synchronized (containerInfo) {
- if (containerInfo.amContainerTask != null) {
- if (!containerInfo.taskPulled) {
- containerInfo.taskPulled = true;
- AMContainerTask amContainerTask = containerInfo.amContainerTask;
- containerTask = new ContainerTask(amContainerTask.getTask(), false,
- convertLocalResourceMap(amContainerTask.getAdditionalResources()),
- amContainerTask.getCredentials(), amContainerTask.haveCredentialsChanged());
- } else {
- containerTask = null;
- }
- } else {
- containerTask = null;
- }
- }
- }
- return containerTask;
+ public TaskCommunicator getTaskCommunicator() {
+ return taskCommunicator;
}
}
http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
new file mode 100644
index 0000000..5652937
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -0,0 +1,474 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.InetSocketAddress;
+import java.net.URISyntaxException;
+import java.net.UnknownHostException;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.ipc.ProtocolSignature;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.ipc.Server;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.security.authorize.PolicyProvider;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.tez.common.*;
+import org.apache.tez.common.ContainerContext;
+import org.apache.tez.common.security.JobTokenIdentifier;
+import org.apache.tez.common.security.JobTokenSecretManager;
+import org.apache.tez.common.security.TokenCache;
+import org.apache.tez.dag.api.TaskCommunicator;
+import org.apache.tez.dag.api.TaskCommunicatorContext;
+import org.apache.tez.dag.api.TaskHeartbeatRequest;
+import org.apache.tez.dag.api.TaskHeartbeatResponse;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezException;
+import org.apache.tez.dag.api.TezUncheckedException;
+import org.apache.tez.dag.app.security.authorize.TezAMPolicyProvider;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.runtime.api.impl.TaskSpec;
+import org.apache.tez.runtime.api.impl.TezHeartbeatRequest;
+import org.apache.tez.runtime.api.impl.TezHeartbeatResponse;
+
+@InterfaceAudience.Private
+public class TezTaskCommunicatorImpl extends TaskCommunicator {
+
+ private static final Log LOG = LogFactory.getLog(TezTaskCommunicatorImpl.class);
+
+ private static final ContainerTask TASK_FOR_INVALID_JVM = new ContainerTask(
+ null, true, null, null, false);
+
+ private final TaskCommunicatorContext taskCommunicatorContext;
+
+ private final ConcurrentMap<ContainerId, ContainerInfo> registeredContainers =
+ new ConcurrentHashMap<ContainerId, ContainerInfo>();
+ private final ConcurrentMap<TaskAttempt, ContainerId> attemptToContainerMap =
+ new ConcurrentHashMap<TaskAttempt, ContainerId>();
+
+ private final TezTaskUmbilicalProtocol taskUmbilical;
+ private InetSocketAddress address;
+ private Server server;
+
+ private static final class ContainerInfo {
+
+ ContainerInfo(ContainerId containerId) {
+ this.containerId = containerId;
+ }
+
+ ContainerId containerId;
+ TezHeartbeatResponse lastResponse = null;
+ TaskSpec taskSpec = null;
+ long lastRequestId = 0;
+ Map<String, LocalResource> additionalLRs = null;
+ Credentials credentials = null;
+ boolean credentialsChanged = false;
+ boolean taskPulled = false;
+
+ void reset() {
+ taskSpec = null;
+ additionalLRs = null;
+ credentials = null;
+ credentialsChanged = false;
+ taskPulled = false;
+ }
+ }
+
+
+
+ /**
+ * Construct the service.
+ */
+ public TezTaskCommunicatorImpl(TaskCommunicatorContext taskCommunicatorContext) {
+ super(TezTaskCommunicatorImpl.class.getName());
+ this.taskCommunicatorContext = taskCommunicatorContext;
+ this.taskUmbilical = new TezTaskUmbilicalProtocolImpl();
+ }
+
+
+ @Override
+ public void serviceStart() {
+
+ startRpcServer();
+ }
+
+ @Override
+ public void serviceStop() {
+ stopRpcServer();
+ }
+
+ protected void startRpcServer() {
+ Configuration conf = getConfig();
+ if (!conf.getBoolean(TezConfiguration.TEZ_LOCAL_MODE, TezConfiguration.TEZ_LOCAL_MODE_DEFAULT)) {
+ try {
+ JobTokenSecretManager jobTokenSecretManager =
+ new JobTokenSecretManager();
+ Token<JobTokenIdentifier> sessionToken = TokenCache.getSessionToken(taskCommunicatorContext.getCredentials());
+ jobTokenSecretManager.addTokenForJob(
+ taskCommunicatorContext.getApplicationAttemptId().getApplicationId().toString(), sessionToken);
+
+ server = new RPC.Builder(conf)
+ .setProtocol(TezTaskUmbilicalProtocol.class)
+ .setBindAddress("0.0.0.0")
+ .setPort(0)
+ .setInstance(taskUmbilical)
+ .setNumHandlers(
+ conf.getInt(TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT,
+ TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT_DEFAULT))
+ .setSecretManager(jobTokenSecretManager).build();
+
+ // Enable service authorization?
+ if (conf.getBoolean(
+ CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
+ false)) {
+ refreshServiceAcls(conf, new TezAMPolicyProvider());
+ }
+
+ server.start();
+ this.address = NetUtils.getConnectAddress(server);
+ } catch (IOException e) {
+ throw new TezUncheckedException(e);
+ }
+ } else {
+ try {
+ this.address = new InetSocketAddress(InetAddress.getLocalHost(), 0);
+ } catch (UnknownHostException e) {
+ throw new TezUncheckedException(e);
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Not starting TaskAttemptListener RPC in LocalMode");
+ }
+ }
+ }
+
+ protected void stopRpcServer() {
+ if (server != null) {
+ server.stop();
+ server = null;
+ }
+ }
+
+ private void refreshServiceAcls(Configuration configuration,
+ PolicyProvider policyProvider) {
+ this.server.refreshServiceAcl(configuration, policyProvider);
+ }
+
+ @Override
+ public void registerRunningContainer(ContainerId containerId, String host, int port) {
+ ContainerInfo oldInfo = registeredContainers.putIfAbsent(containerId, new ContainerInfo(containerId));
+ if (oldInfo != null) {
+ throw new TezUncheckedException("Multiple registrations for containerId: " + containerId);
+ }
+ }
+
+ @Override
+ public void registerContainerEnd(ContainerId containerId) {
+ ContainerInfo containerInfo = registeredContainers.remove(containerId);
+ if (containerInfo != null) {
+ synchronized(containerInfo) {
+ if (containerInfo.taskSpec != null && containerInfo.taskSpec.getTaskAttemptID() != null) {
+ attemptToContainerMap.remove(containerInfo.taskSpec.getTaskAttemptID());
+ }
+ }
+ }
+ }
+
+ @Override
+ public void registerRunningTaskAttempt(ContainerId containerId, TaskSpec taskSpec,
+ Map<String, LocalResource> additionalResources,
+ Credentials credentials, boolean credentialsChanged) {
+
+ ContainerInfo containerInfo = registeredContainers.get(containerId);
+ Preconditions.checkNotNull(containerInfo,
+ "Cannot register task attempt: " + taskSpec.getTaskAttemptID() + " to unknown container: " +
+ containerId);
+ synchronized (containerInfo) {
+ if (containerInfo.taskSpec != null) {
+ throw new TezUncheckedException(
+ "Cannot register task: " + taskSpec.getTaskAttemptID() + " to container: " +
+ containerId + " , with pre-existing assignment: " +
+ containerInfo.taskSpec.getTaskAttemptID());
+ }
+ containerInfo.taskSpec = taskSpec;
+ containerInfo.additionalLRs = additionalResources;
+ containerInfo.credentials = credentials;
+ containerInfo.credentialsChanged = credentialsChanged;
+ containerInfo.taskPulled = false;
+
+ ContainerId oldId = attemptToContainerMap.putIfAbsent(new TaskAttempt(taskSpec.getTaskAttemptID()), containerId);
+ if (oldId != null) {
+ throw new TezUncheckedException(
+ "Attempting to register an already registered taskAttempt with id: " +
+ taskSpec.getTaskAttemptID() + " to containerId: " + containerId +
+ ". Already registered to containerId: " + oldId);
+ }
+ }
+
+ }
+
+ @Override
+ public void unregisterRunningTaskAttempt(TezTaskAttemptID taskAttemptID) {
+ TaskAttempt taskAttempt = new TaskAttempt(taskAttemptID);
+ ContainerId containerId = attemptToContainerMap.remove(taskAttempt);
+ if(containerId == null) {
+ LOG.warn("Unregister task attempt: " + taskAttempt + " from unknown container");
+ return;
+ }
+ ContainerInfo containerInfo = registeredContainers.get(containerId);
+ if (containerInfo == null) {
+ LOG.warn("Unregister task attempt: " + taskAttempt +
+ " from non-registered container: " + containerId);
+ return;
+ }
+ synchronized (containerInfo) {
+ containerInfo.reset();
+ attemptToContainerMap.remove(taskAttempt);
+ }
+ }
+
+ @Override
+ public InetSocketAddress getAddress() {
+ return address;
+ }
+
+ public TezTaskUmbilicalProtocol getUmbilical() {
+ return this.taskUmbilical;
+ }
+
+ private class TezTaskUmbilicalProtocolImpl implements TezTaskUmbilicalProtocol {
+
+ @Override
+ public ContainerTask getTask(ContainerContext containerContext) throws IOException {
+ ContainerTask task = null;
+ if (containerContext == null || containerContext.getContainerIdentifier() == null) {
+ LOG.info("Invalid task request with an empty containerContext or containerId");
+ task = TASK_FOR_INVALID_JVM;
+ } else {
+ ContainerId containerId = ConverterUtils.toContainerId(containerContext
+ .getContainerIdentifier());
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Container with id: " + containerId + " asked for a task");
+ }
+ task = getContainerTask(containerId);
+ if (task != null && !task.shouldDie()) {
+ taskCommunicatorContext
+ .taskStartedRemotely(task.getTaskSpec().getTaskAttemptID(), containerId);
+ }
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("getTask returning task: " + task);
+ }
+ return task;
+ }
+
+ @Override
+ public boolean canCommit(TezTaskAttemptID taskAttemptId) throws IOException {
+ return taskCommunicatorContext.canCommit(taskAttemptId);
+ }
+
+ @Override
+ public TezHeartbeatResponse heartbeat(TezHeartbeatRequest request) throws IOException,
+ TezException {
+ ContainerId containerId = ConverterUtils.toContainerId(request.getContainerIdentifier());
+ long requestId = request.getRequestId();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Received heartbeat from container"
+ + ", request=" + request);
+ }
+
+ ContainerInfo containerInfo = registeredContainers.get(containerId);
+ if (containerInfo == null) {
+ LOG.warn("Received task heartbeat from unknown container with id: " + containerId +
+ ", asking it to die");
+ TezHeartbeatResponse response = new TezHeartbeatResponse();
+ response.setLastRequestId(requestId);
+ response.setShouldDie();
+ return response;
+ }
+
+ synchronized (containerInfo) {
+ if (containerInfo.lastRequestId == requestId) {
+ LOG.warn("Old sequenceId received: " + requestId
+ + ", Re-sending last response to client");
+ return containerInfo.lastResponse;
+ }
+ }
+
+ TaskHeartbeatResponse tResponse = null;
+
+
+ TezTaskAttemptID taskAttemptID = request.getCurrentTaskAttemptID();
+ if (taskAttemptID != null) {
+ synchronized (containerInfo) {
+ ContainerId containerIdFromMap = attemptToContainerMap.get(new TaskAttempt(taskAttemptID));
+ if (containerIdFromMap == null || !containerIdFromMap.equals(containerId)) {
+ throw new TezException("Attempt " + taskAttemptID
+ + " is not recognized for heartbeat");
+ }
+
+ if (containerInfo.lastRequestId + 1 != requestId) {
+ throw new TezException("Container " + containerId
+ + " has invalid request id. Expected: "
+ + containerInfo.lastRequestId + 1
+ + " and actual: " + requestId);
+ }
+ }
+ TaskHeartbeatRequest tRequest = new TaskHeartbeatRequest(request.getContainerIdentifier(),
+ request.getCurrentTaskAttemptID(), request.getEvents(), request.getStartIndex(),
+ request.getMaxEvents());
+ tResponse = taskCommunicatorContext.heartbeat(tRequest);
+ }
+ TezHeartbeatResponse response;
+ if (tResponse == null) {
+ response = new TezHeartbeatResponse();
+ } else {
+ response = new TezHeartbeatResponse(tResponse.getEvents());
+ }
+ response.setLastRequestId(requestId);
+ containerInfo.lastRequestId = requestId;
+ containerInfo.lastResponse = response;
+ return response;
+ }
+
+
+ // TODO Remove this method once we move to the Protobuf RPC engine
+ @Override
+ public long getProtocolVersion(String protocol, long clientVersion) throws IOException {
+ return versionID;
+ }
+
+ // TODO Remove this method once we move to the Protobuf RPC engine
+ @Override
+ public ProtocolSignature getProtocolSignature(String protocol, long clientVersion,
+ int clientMethodsHash) throws IOException {
+ return ProtocolSignature.getProtocolSignature(this, protocol,
+ clientVersion, clientMethodsHash);
+ }
+ }
+
+ private ContainerTask getContainerTask(ContainerId containerId) throws IOException {
+ ContainerInfo containerInfo = registeredContainers.get(containerId);
+ ContainerTask task = null;
+ if (containerInfo == null) {
+ if (taskCommunicatorContext.isKnownContainer(containerId)) {
+ LOG.info("Container with id: " + containerId
+ + " is valid, but no longer registered, and will be killed");
+ } else {
+ LOG.info("Container with id: " + containerId
+ + " is invalid and will be killed");
+ }
+ task = TASK_FOR_INVALID_JVM;
+ } else {
+ synchronized (containerInfo) {
+ if (containerInfo.taskSpec != null) {
+ if (!containerInfo.taskPulled) {
+ containerInfo.taskPulled = true;
+ task = constructContainerTask(containerInfo);
+ } else {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Task " + containerInfo.taskSpec.getTaskAttemptID() +
+ " already sent to container: " + containerId);
+ }
+ task = null;
+ }
+ } else {
+ task = null;
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("No task assigned yet for running container: " + containerId);
+ }
+ }
+ }
+ }
+ return task;
+ }
+
+ private ContainerTask constructContainerTask(ContainerInfo containerInfo) throws IOException {
+ return new ContainerTask(containerInfo.taskSpec, false,
+ convertLocalResourceMap(containerInfo.additionalLRs), containerInfo.credentials,
+ containerInfo.credentialsChanged);
+ }
+
+ private Map<String, TezLocalResource> convertLocalResourceMap(Map<String, LocalResource> ylrs)
+ throws IOException {
+ Map<String, TezLocalResource> tlrs = Maps.newHashMap();
+ if (ylrs != null) {
+ for (Map.Entry<String, LocalResource> ylrEntry : ylrs.entrySet()) {
+ TezLocalResource tlr;
+ try {
+ tlr = TezConverterUtils.convertYarnLocalResourceToTez(ylrEntry.getValue());
+ } catch (URISyntaxException e) {
+ throw new IOException(e);
+ }
+ tlrs.put(ylrEntry.getKey(), tlr);
+ }
+ }
+ return tlrs;
+ }
+
+
+ // Holder for Task information, which eventually will likely be VertexImplm taskIndex, attemptIndex
+ private static class TaskAttempt {
+ // TODO TEZ-2003 Change this to work with VertexName, int id, int version
+ // TODO TEZ-2003 Avoid constructing this unit all over the place
+ private TezTaskAttemptID taskAttemptId;
+
+ TaskAttempt(TezTaskAttemptID taskAttemptId) {
+ this.taskAttemptId = taskAttemptId;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof TaskAttempt)) {
+ return false;
+ }
+
+ TaskAttempt that = (TaskAttempt) o;
+
+ if (!taskAttemptId.equals(that.taskAttemptId)) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return taskAttemptId.hashCode();
+ }
+
+ @Override
+ public String toString() {
+ return "TaskAttempt{" + "taskAttemptId=" + taskAttemptId + '}';
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
index 9faf8c0..e9ba9d7 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
@@ -59,6 +59,8 @@ import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.app.AppContext;
import org.apache.tez.dag.app.TaskAttemptListener;
+import org.apache.tez.dag.app.TaskAttemptListenerImpTezDag;
+import org.apache.tez.dag.app.TezTaskCommunicatorImpl;
import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
import org.apache.tez.dag.app.rm.NMCommunicatorLaunchRequestEvent;
import org.apache.tez.dag.app.rm.NMCommunicatorStopRequestEvent;
@@ -86,7 +88,7 @@ public class LocalContainerLauncher extends AbstractService implements
private static final Logger LOG = LoggerFactory.getLogger(LocalContainerLauncher.class);
private final AppContext context;
- private final TaskAttemptListener taskAttemptListener;
+ private final TezTaskUmbilicalProtocol taskUmbilicalProtocol;
private final AtomicBoolean serviceStopped = new AtomicBoolean(false);
private final String workingDirectory;
private final Map<String, String> localEnv = new HashMap<String, String>();
@@ -114,7 +116,9 @@ public class LocalContainerLauncher extends AbstractService implements
String workingDirectory) throws UnknownHostException {
super(LocalContainerLauncher.class.getName());
this.context = context;
- this.taskAttemptListener = taskAttemptListener;
+ TaskAttemptListenerImpTezDag taListener = (TaskAttemptListenerImpTezDag)taskAttemptListener;
+ TezTaskCommunicatorImpl taskComm = (TezTaskCommunicatorImpl) taListener.getTaskCommunicator();
+ this.taskUmbilicalProtocol = taskComm.getUmbilical();
this.workingDirectory = workingDirectory;
AuxiliaryServiceHelper.setServiceDataIntoEnv(
ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID, ByteBuffer.allocate(4).putInt(0), localEnv);
@@ -215,7 +219,7 @@ public class LocalContainerLauncher extends AbstractService implements
tezChild =
createTezChild(context.getAMConf(), event.getContainerId(), tokenIdentifier,
context.getApplicationAttemptId().getAttemptId(), context.getLocalDirs(),
- (TezTaskUmbilicalProtocol) taskAttemptListener,
+ taskUmbilicalProtocol,
TezCommonUtils.parseCredentialsBytes(event.getContainerLaunchContext().getTokens().array()));
} catch (InterruptedException e) {
handleLaunchFailed(e, event.getContainerId());
http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java
index a6b403d..0fc2e12 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainer.java
@@ -22,6 +22,7 @@ import java.util.List;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.tez.dag.records.TezTaskAttemptID;
@@ -32,5 +33,5 @@ public interface AMContainer extends EventHandler<AMContainerEvent>{
public Container getContainer();
public List<TezTaskAttemptID> getAllTaskAttempts();
public TezTaskAttemptID getCurrentTaskAttempt();
-
+
}
http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventAssignTA.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventAssignTA.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventAssignTA.java
index 682cd02..0398882 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventAssignTA.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventAssignTA.java
@@ -27,6 +27,8 @@ import org.apache.tez.runtime.api.impl.TaskSpec;
public class AMContainerEventAssignTA extends AMContainerEvent {
+ // TODO TEZ-2003. Add the task priority to this event.
+
private final TezTaskAttemptID attemptId;
// TODO Maybe have tht TAL pull the remoteTask from the TaskAttempt itself ?
private final TaskSpec remoteTaskSpec;
http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
index 330f2b7..1acec9c 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.event.Event;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.state.InvalidStateTransitonException;
http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index 5cd487c..b846922 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -37,6 +37,7 @@ import java.util.concurrent.atomic.AtomicLong;
import org.apache.tez.dag.app.dag.DAG;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.service.AbstractService;
@@ -50,7 +51,10 @@ import org.apache.tez.client.TezApiVersionInfo;
import org.apache.tez.common.ContainerContext;
import org.apache.tez.common.ContainerTask;
import org.apache.tez.common.counters.TezCounters;
+import org.apache.tez.dag.api.TaskHeartbeatRequest;
+import org.apache.tez.dag.api.TaskHeartbeatResponse;
import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TaskCommunicator;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.app.launcher.ContainerLauncher;
import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
@@ -72,8 +76,6 @@ import org.apache.tez.runtime.api.impl.TaskSpec;
import org.apache.tez.runtime.api.impl.TaskStatistics;
import org.apache.tez.runtime.api.impl.TezEvent;
import org.apache.tez.runtime.api.impl.EventMetaData.EventProducerConsumerType;
-import org.apache.tez.runtime.api.impl.TezHeartbeatRequest;
-import org.apache.tez.runtime.api.impl.TezHeartbeatResponse;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
@@ -130,6 +132,7 @@ public class MockDAGAppMaster extends DAGAppMaster {
Map<ContainerId, ContainerData> containers = Maps.newConcurrentMap();
ArrayBlockingQueue<Worker> workers;
TaskAttemptListenerImpTezDag taListener;
+ TezTaskCommunicatorImpl taskCommunicator;
AtomicBoolean startScheduling = new AtomicBoolean(true);
AtomicBoolean goFlag;
@@ -192,6 +195,7 @@ public class MockDAGAppMaster extends DAGAppMaster {
@Override
public void serviceStart() throws Exception {
taListener = (TaskAttemptListenerImpTezDag) getTaskAttemptListener();
+ taskCommunicator = (TezTaskCommunicatorImpl) taListener.getTaskCommunicator();
eventHandlingThread = new Thread(this);
eventHandlingThread.start();
ExecutorService rawExecutor = Executors.newFixedThreadPool(handlerConcurrency,
@@ -331,10 +335,10 @@ public class MockDAGAppMaster extends DAGAppMaster {
}
}
- private void doHeartbeat(TezHeartbeatRequest request, ContainerData cData) throws Exception {
+ private void doHeartbeat(TaskHeartbeatRequest request, ContainerData cData) throws Exception {
long startTime = System.nanoTime();
long startCpuTime = threadMxBean.getCurrentThreadCpuTime();
- TezHeartbeatResponse response = taListener.heartbeat(request);
+ TaskHeartbeatResponse response = taListener.heartbeat(request);
if (response.shouldDie()) {
cData.remove();
} else {
@@ -385,7 +389,8 @@ public class MockDAGAppMaster extends DAGAppMaster {
try {
if (cData.taId == null) {
// if container is not assigned a task, ask for a task
- ContainerTask cTask = taListener.getTask(new ContainerContext(cData.cIdStr));
+ ContainerTask cTask =
+ taskCommunicator.getUmbilical().getTask(new ContainerContext(cData.cIdStr));
if (cTask != null) {
if (cTask.shouldDie()) {
cData.remove();
@@ -420,8 +425,11 @@ public class MockDAGAppMaster extends DAGAppMaster {
float progress = updateProgress ? cData.numUpdates/maxUpdates : 0f;
events.add(new TezEvent(new TaskStatusUpdateEvent(counters, progress, stats), new EventMetaData(
EventProducerConsumerType.SYSTEM, cData.vName, "", cData.taId)));
- TezHeartbeatRequest request = new TezHeartbeatRequest(cData.numUpdates, events,
- cData.cIdStr, cData.taId, cData.nextFromEventId, 50000);
+// TezHeartbeatRequest request = new TezHeartbeatRequest(cData.numUpdates, events,
+// cData.cIdStr, cData.taId, cData.nextFromEventId, 50000);
+ TaskHeartbeatRequest request =
+ new TaskHeartbeatRequest(cData.cIdStr, cData.taId, events, cData.nextFromEventId,
+ 50000);
doHeartbeat(request, cData);
} else if (version != null && cData.taId.getId() <= version.intValue()) {
preemptContainer(cData);
@@ -431,8 +439,9 @@ public class MockDAGAppMaster extends DAGAppMaster {
List<TezEvent> events = Collections.singletonList(new TezEvent(
new TaskAttemptCompletedEvent(), new EventMetaData(
EventProducerConsumerType.SYSTEM, cData.vName, "", cData.taId)));
- TezHeartbeatRequest request = new TezHeartbeatRequest(++cData.numUpdates, events,
- cData.cIdStr, cData.taId, cData.nextFromEventId, 10000);
+ TaskHeartbeatRequest request =
+ new TaskHeartbeatRequest(cData.cIdStr, cData.taId, events, cData.nextFromEventId,
+ 10000);
doHeartbeat(request, cData);
cData.clear();
}
http://git-wip-us.apache.org/repos/asf/tez/blob/7b71d3b7/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
index db8eff1..46c412e 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
@@ -1,16 +1,16 @@
/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
package org.apache.tez.dag.app;
@@ -18,6 +18,7 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
@@ -34,6 +35,7 @@ import java.util.Map;
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.event.Event;
import org.apache.hadoop.yarn.event.EventHandler;
@@ -41,6 +43,12 @@ import org.apache.tez.common.ContainerContext;
import org.apache.tez.common.ContainerTask;
import org.apache.tez.common.security.JobTokenSecretManager;
import org.apache.tez.dag.api.TezException;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.tez.common.ContainerContext;
+import org.apache.tez.common.ContainerTask;
+import org.apache.tez.common.TezTaskUmbilicalProtocol;
+import org.apache.tez.dag.api.TaskCommunicatorContext;
import org.apache.tez.dag.app.dag.DAG;
import org.apache.tez.dag.app.dag.Vertex;
import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
@@ -101,9 +109,18 @@ public class TestTaskAttemptListenerImplTezDag {
doReturn(dag).when(appContext).getCurrentDAG();
doReturn(appAcls).when(appContext).getApplicationACLs();
doReturn(amContainerMap).when(appContext).getAllContainers();
-
- taskAttemptListener = new TaskAttemptListenerImplForTest(appContext,
- mock(TaskHeartbeatHandler.class), mock(ContainerHeartbeatHandler.class), null);
+ NodeId nodeId = NodeId.newInstance("localhost", 0);
+ AMContainer amContainer = mock(AMContainer.class);
+ Container container = mock(Container.class);
+ doReturn(nodeId).when(container).getNodeId();
+ doReturn(amContainer).when(amContainerMap).get(any(ContainerId.class));
+ doReturn(container).when(amContainer).getContainer();
+
+ taskAttemptListener =
+ new TaskAttemptListenerImpTezDag(appContext, mock(TaskHeartbeatHandler.class),
+ mock(ContainerHeartbeatHandler.class), null);
+ TezTaskCommunicatorImpl taskCommunicator = (TezTaskCommunicatorImpl)taskAttemptListener.getTaskCommunicator();
+ TezTaskUmbilicalProtocol tezUmbilical = taskCommunicator.getUmbilical();
taskSpec = mock(TaskSpec.class);
doReturn(taskAttemptID).when(taskSpec).getTaskAttemptID();
@@ -115,32 +132,30 @@ public class TestTaskAttemptListenerImplTezDag {
public void testGetTask() throws IOException {
ContainerId containerId1 = createContainerId(appId, 1);
- doReturn(mock(AMContainer.class)).when(amContainerMap).get(containerId1);
ContainerContext containerContext1 = new ContainerContext(containerId1.toString());
- containerTask = taskAttemptListener.getTask(containerContext1);
+ containerTask = tezUmbilical.getTask(containerContext1);
assertTrue(containerTask.shouldDie());
ContainerId containerId2 = createContainerId(appId, 2);
- doReturn(mock(AMContainer.class)).when(amContainerMap).get(containerId2);
ContainerContext containerContext2 = new ContainerContext(containerId2.toString());
taskAttemptListener.registerRunningContainer(containerId2);
- containerTask = taskAttemptListener.getTask(containerContext2);
+ containerTask = tezUmbilical.getTask(containerContext2);
assertNull(containerTask);
// Valid task registered
taskAttemptListener.registerTaskAttempt(amContainerTask, containerId2);
- containerTask = taskAttemptListener.getTask(containerContext2);
+ containerTask = tezUmbilical.getTask(containerContext2);
assertFalse(containerTask.shouldDie());
assertEquals(taskSpec, containerTask.getTaskSpec());
// Task unregistered. Should respond to heartbeats
- taskAttemptListener.unregisterTaskAttempt(taskAttemptID);
- containerTask = taskAttemptListener.getTask(containerContext2);
+ taskAttemptListener.unregisterTaskAttempt(taskAttemptId);
+ containerTask = tezUmbilical.getTask(containerContext2);
assertNull(containerTask);
// Container unregistered. Should send a shouldDie = true
taskAttemptListener.unregisterRunningContainer(containerId2);
- containerTask = taskAttemptListener.getTask(containerContext2);
+ containerTask = tezUmbilical.getTask(containerContext2);
assertTrue(containerTask.shouldDie());
ContainerId containerId3 = createContainerId(appId, 3);
@@ -154,27 +169,30 @@ public class TestTaskAttemptListenerImplTezDag {
AMContainerTask amContainerTask2 = new AMContainerTask(taskSpec, null, null, false, 0);
taskAttemptListener.registerTaskAttempt(amContainerTask2, containerId3);
taskAttemptListener.unregisterRunningContainer(containerId3);
- containerTask = taskAttemptListener.getTask(containerContext3);
+ containerTask = tezUmbilical.getTask(containerContext3);
assertTrue(containerTask.shouldDie());
}
@Test(timeout = 5000)
public void testGetTaskMultiplePulls() throws IOException {
+ TezTaskCommunicatorImpl taskCommunicator = (TezTaskCommunicatorImpl)taskAttemptListener.getTaskCommunicator();
+ TezTaskUmbilicalProtocol tezUmbilical = taskCommunicator.getUmbilical();
+
ContainerId containerId1 = createContainerId(appId, 1);
doReturn(mock(AMContainer.class)).when(amContainerMap).get(containerId1);
ContainerContext containerContext1 = new ContainerContext(containerId1.toString());
taskAttemptListener.registerRunningContainer(containerId1);
- containerTask = taskAttemptListener.getTask(containerContext1);
+ containerTask = tezUmbilical.getTask(containerContext1);
assertNull(containerTask);
// Register task
taskAttemptListener.registerTaskAttempt(amContainerTask, containerId1);
- containerTask = taskAttemptListener.getTask(containerContext1);
+ containerTask = tezUmbilical.getTask(containerContext1);
assertFalse(containerTask.shouldDie());
assertEquals(taskSpec, containerTask.getTaskSpec());
// Try pulling again - simulates re-use pull
- containerTask = taskAttemptListener.getTask(containerContext1);
+ containerTask = tezUmbilical.getTask(containerContext1);
assertNull(containerTask);
}
@@ -266,13 +284,11 @@ public class TestTaskAttemptListenerImplTezDag {
return ContainerId.newInstance(appAttemptId, containerIdx);
}
- private static class TaskAttemptListenerImplForTest extends TaskAttemptListenerImpTezDag {
+ private static class TezTaskCommunicatorImplForTest extends TezTaskCommunicatorImpl {
- public TaskAttemptListenerImplForTest(AppContext context,
- TaskHeartbeatHandler thh,
- ContainerHeartbeatHandler chh,
- JobTokenSecretManager jobTokenSecretManager) {
- super(context, thh, chh, jobTokenSecretManager);
+ public TezTaskCommunicatorImplForTest(
+ TaskCommunicatorContext taskCommunicatorContext) {
+ super(taskCommunicatorContext);
}
@Override
[28/43] tez git commit: TEZ-2139. Update version to
0.7.0-TEZ-2003-SNAPSHOT. (sseth)
Posted by ss...@apache.org.
TEZ-2139. Update version to 0.7.0-TEZ-2003-SNAPSHOT. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/6014c965
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/6014c965
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/6014c965
Branch: refs/heads/TEZ-2003
Commit: 6014c965aa25255d25a8a5538888a5ff9d01fbf5
Parents: 39295ca
Author: Siddharth Seth <ss...@apache.org>
Authored: Mon Feb 23 21:59:39 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:30 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
docs/pom.xml | 2 +-
pom.xml | 2 +-
tez-api/pom.xml | 2 +-
tez-common/pom.xml | 2 +-
tez-dag/pom.xml | 2 +-
tez-dist/pom.xml | 2 +-
tez-examples/pom.xml | 2 +-
tez-ext-service-tests/pom.xml | 2 +-
tez-mapreduce/pom.xml | 2 +-
tez-plugins/pom.xml | 2 +-
tez-plugins/tez-mbeans-resource-calculator/pom.xml | 2 +-
tez-plugins/tez-yarn-timeline-history-with-acls/pom.xml | 2 +-
tez-plugins/tez-yarn-timeline-history/pom.xml | 2 +-
tez-runtime-internals/pom.xml | 2 +-
tez-runtime-library/pom.xml | 2 +-
tez-tests/pom.xml | 2 +-
tez-ui/pom.xml | 2 +-
18 files changed, 18 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 4377f57..5c5fd8e 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -8,5 +8,6 @@ ALL CHANGES:
TEZ-2125. Create a task communicator for local mode. Allow tasks to run in the AM.
TEZ-2131. Add additional tests for tasks running in the AM.
TEZ-2138. Fix minor bugs in adding default scheduler, getting launchers.
+ TEZ-2139. Update tez version to 0.7.0-TEZ-2003-SNAPSHOT.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/docs/pom.xml
----------------------------------------------------------------------
diff --git a/docs/pom.xml b/docs/pom.xml
index a475c58..ded04a8 100644
--- a/docs/pom.xml
+++ b/docs/pom.xml
@@ -27,7 +27,7 @@
<parent>
<groupId>org.apache.tez</groupId>
<artifactId>tez</artifactId>
- <version>0.8.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
</parent>
<artifactId>tez-docs</artifactId>
<packaging>pom</packaging>
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index ca9db11..8799f0a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -19,7 +19,7 @@
<groupId>org.apache.tez</groupId>
<artifactId>tez</artifactId>
<packaging>pom</packaging>
- <version>0.8.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
<name>tez</name>
<licenses>
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-api/pom.xml
----------------------------------------------------------------------
diff --git a/tez-api/pom.xml b/tez-api/pom.xml
index 0a5d570..46e8dd3 100644
--- a/tez-api/pom.xml
+++ b/tez-api/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>org.apache.tez</groupId>
<artifactId>tez</artifactId>
- <version>0.8.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
</parent>
<artifactId>tez-api</artifactId>
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-common/pom.xml
----------------------------------------------------------------------
diff --git a/tez-common/pom.xml b/tez-common/pom.xml
index c6922be..5e0ceef 100644
--- a/tez-common/pom.xml
+++ b/tez-common/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>org.apache.tez</groupId>
<artifactId>tez</artifactId>
- <version>0.8.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
</parent>
<artifactId>tez-common</artifactId>
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-dag/pom.xml
----------------------------------------------------------------------
diff --git a/tez-dag/pom.xml b/tez-dag/pom.xml
index cc005bb..4d2e9f5 100644
--- a/tez-dag/pom.xml
+++ b/tez-dag/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>org.apache.tez</groupId>
<artifactId>tez</artifactId>
- <version>0.8.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
</parent>
<properties>
<tez.component>tez-dag</tez.component>
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-dist/pom.xml
----------------------------------------------------------------------
diff --git a/tez-dist/pom.xml b/tez-dist/pom.xml
index e78cb8e..e188fe2 100644
--- a/tez-dist/pom.xml
+++ b/tez-dist/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>org.apache.tez</groupId>
<artifactId>tez</artifactId>
- <version>0.8.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
</parent>
<artifactId>tez-dist</artifactId>
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-examples/pom.xml
----------------------------------------------------------------------
diff --git a/tez-examples/pom.xml b/tez-examples/pom.xml
index cd2df5c..d15e966 100644
--- a/tez-examples/pom.xml
+++ b/tez-examples/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>org.apache.tez</groupId>
<artifactId>tez</artifactId>
- <version>0.8.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
</parent>
<artifactId>tez-examples</artifactId>
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-ext-service-tests/pom.xml
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/pom.xml b/tez-ext-service-tests/pom.xml
index 37f68b1..1113341 100644
--- a/tez-ext-service-tests/pom.xml
+++ b/tez-ext-service-tests/pom.xml
@@ -20,7 +20,7 @@
<parent>
<artifactId>tez</artifactId>
<groupId>org.apache.tez</groupId>
- <version>0.7.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
</parent>
<!-- TODO TEZ-2003 Merge this into the tez-tests module -->
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-mapreduce/pom.xml
----------------------------------------------------------------------
diff --git a/tez-mapreduce/pom.xml b/tez-mapreduce/pom.xml
index 300f781..af8bc8b 100644
--- a/tez-mapreduce/pom.xml
+++ b/tez-mapreduce/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>org.apache.tez</groupId>
<artifactId>tez</artifactId>
- <version>0.8.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
</parent>
<artifactId>tez-mapreduce</artifactId>
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-plugins/pom.xml
----------------------------------------------------------------------
diff --git a/tez-plugins/pom.xml b/tez-plugins/pom.xml
index 9b2a4cb..0b148ec 100644
--- a/tez-plugins/pom.xml
+++ b/tez-plugins/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>org.apache.tez</groupId>
<artifactId>tez</artifactId>
- <version>0.8.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
</parent>
<artifactId>tez-plugins</artifactId>
<packaging>pom</packaging>
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-plugins/tez-mbeans-resource-calculator/pom.xml
----------------------------------------------------------------------
diff --git a/tez-plugins/tez-mbeans-resource-calculator/pom.xml b/tez-plugins/tez-mbeans-resource-calculator/pom.xml
index 4be7f28..5c828a4 100644
--- a/tez-plugins/tez-mbeans-resource-calculator/pom.xml
+++ b/tez-plugins/tez-mbeans-resource-calculator/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>org.apache.tez</groupId>
<artifactId>tez-plugins</artifactId>
- <version>0.8.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
</parent>
<artifactId>tez-mbeans-resource-calculator</artifactId>
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-plugins/tez-yarn-timeline-history-with-acls/pom.xml
----------------------------------------------------------------------
diff --git a/tez-plugins/tez-yarn-timeline-history-with-acls/pom.xml b/tez-plugins/tez-yarn-timeline-history-with-acls/pom.xml
index aeae5cf..761bc10 100644
--- a/tez-plugins/tez-yarn-timeline-history-with-acls/pom.xml
+++ b/tez-plugins/tez-yarn-timeline-history-with-acls/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>org.apache.tez</groupId>
<artifactId>tez-plugins</artifactId>
- <version>0.8.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
</parent>
<artifactId>tez-yarn-timeline-history-with-acls</artifactId>
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-plugins/tez-yarn-timeline-history/pom.xml
----------------------------------------------------------------------
diff --git a/tez-plugins/tez-yarn-timeline-history/pom.xml b/tez-plugins/tez-yarn-timeline-history/pom.xml
index ca76988..1e40329 100644
--- a/tez-plugins/tez-yarn-timeline-history/pom.xml
+++ b/tez-plugins/tez-yarn-timeline-history/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>org.apache.tez</groupId>
<artifactId>tez-plugins</artifactId>
- <version>0.8.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
</parent>
<artifactId>tez-yarn-timeline-history</artifactId>
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-runtime-internals/pom.xml
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/pom.xml b/tez-runtime-internals/pom.xml
index 3756fa9..b6dee8c 100644
--- a/tez-runtime-internals/pom.xml
+++ b/tez-runtime-internals/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>org.apache.tez</groupId>
<artifactId>tez</artifactId>
- <version>0.8.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
</parent>
<artifactId>tez-runtime-internals</artifactId>
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-runtime-library/pom.xml
----------------------------------------------------------------------
diff --git a/tez-runtime-library/pom.xml b/tez-runtime-library/pom.xml
index 03e0ec3..8b566df 100644
--- a/tez-runtime-library/pom.xml
+++ b/tez-runtime-library/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>org.apache.tez</groupId>
<artifactId>tez</artifactId>
- <version>0.8.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
</parent>
<artifactId>tez-runtime-library</artifactId>
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-tests/pom.xml
----------------------------------------------------------------------
diff --git a/tez-tests/pom.xml b/tez-tests/pom.xml
index 91bc753..530ab77 100644
--- a/tez-tests/pom.xml
+++ b/tez-tests/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>org.apache.tez</groupId>
<artifactId>tez</artifactId>
- <version>0.8.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
</parent>
<artifactId>tez-tests</artifactId>
http://git-wip-us.apache.org/repos/asf/tez/blob/6014c965/tez-ui/pom.xml
----------------------------------------------------------------------
diff --git a/tez-ui/pom.xml b/tez-ui/pom.xml
index bfc6f01..8ecf7d0 100644
--- a/tez-ui/pom.xml
+++ b/tez-ui/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>org.apache.tez</groupId>
<artifactId>tez</artifactId>
- <version>0.8.0-SNAPSHOT</version>
+ <version>0.8.0-TEZ-2003-SNAPSHOT</version>
</parent>
<artifactId>tez-ui</artifactId>
<packaging>war</packaging>
[32/43] tez git commit: TEZ-2131. Add additional tests for tasks
running in the AM. (sseth)
Posted by ss...@apache.org.
TEZ-2131. Add additional tests for tasks running in the AM. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/67e1643c
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/67e1643c
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/67e1643c
Branch: refs/heads/TEZ-2003
Commit: 67e1643c727103bb9fc04d94909a0dfea980d22f
Parents: a45ef85
Author: Siddharth Seth <ss...@apache.org>
Authored: Fri Feb 20 17:23:18 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:30 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../org/apache/tez/tests/TestExternalTezServices.java | 10 +++++++---
2 files changed, 8 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/67e1643c/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 76496c9..4b0a12b 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -6,5 +6,6 @@ ALL CHANGES:
TEZ-2122. Setup pluggable components at AM/Vertex level.
TEZ-2123. Fix component managers to use pluggable components. (Enable hybrid mode)
TEZ-2125. Create a task communicator for local mode. Allow tasks to run in the AM.
+ TEZ-2131. Add additional tests for tasks running in the AM.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/67e1643c/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
index 01c2080..0ec972b 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
@@ -40,7 +40,6 @@ import org.apache.tez.service.MiniTezTestServiceCluster;
import org.apache.tez.test.MiniTezCluster;
import org.junit.AfterClass;
import org.junit.BeforeClass;
-import org.junit.Ignore;
import org.junit.Test;
public class TestExternalTezServices {
@@ -229,6 +228,13 @@ public class TestExternalTezServices {
}
@Test(timeout = 60000)
+ public void testAllInAM() throws Exception {
+ int expectedExternalSubmissions = 0; // All in AM
+ runJoinValidate("AllInAM", expectedExternalSubmissions, PROPS_IN_AM,
+ PROPS_IN_AM, PROPS_IN_AM);
+ }
+
+ @Test(timeout = 60000)
public void testMixed1() throws Exception { // M-ExtService, R-containers
int expectedExternalSubmissions = 4 + 0; //4 for 4 src files, 0 for num reducers.
runJoinValidate("Mixed1", expectedExternalSubmissions, PROPS_EXT_SERVICE_PUSH,
@@ -263,8 +269,6 @@ public class TestExternalTezServices {
PROPS_EXT_SERVICE_PUSH, PROPS_IN_AM);
}
-
- @Ignore // Re-activate this after the AM registers the shuffle token with the launcher.
@Test(timeout = 60000)
public void testMixed6() throws Exception { // M - AM, R - Service
int expectedExternalSubmissions = 0 + 3; // 3 for R in service
[08/43] tez git commit: TEZ-1970. Fix javadoc warnings in
SortMergeJoinExample. (Mit Desai via hitesh)
Posted by ss...@apache.org.
TEZ-1970. Fix javadoc warnings in SortMergeJoinExample. (Mit Desai via hitesh)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/d5a0f39d
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/d5a0f39d
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/d5a0f39d
Branch: refs/heads/TEZ-2003
Commit: d5a0f39d98be9ae06be9008c1deee36827c204e6
Parents: 78ca7af
Author: Hitesh Shah <hi...@apache.org>
Authored: Wed May 6 16:16:30 2015 -0700
Committer: Hitesh Shah <hi...@apache.org>
Committed: Wed May 6 16:16:30 2015 -0700
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../main/java/org/apache/tez/examples/SortMergeJoinExample.java | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/d5a0f39d/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 73f8fda..58648e4 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -9,6 +9,7 @@ INCOMPATIBLE CHANGES
ALL CHANGES:
TEZ-2419. Inputs/Outputs should inform the Processor about Interrupts when interrupted during a blocking Op.
TEZ-1752. Inputs / Outputs in the Runtime library should be interruptable.
+ TEZ-1970. Fix javadoc warnings in SortMergeJoinExample.
Release 0.7.0: Unreleased
http://git-wip-us.apache.org/repos/asf/tez/blob/d5a0f39d/tez-examples/src/main/java/org/apache/tez/examples/SortMergeJoinExample.java
----------------------------------------------------------------------
diff --git a/tez-examples/src/main/java/org/apache/tez/examples/SortMergeJoinExample.java b/tez-examples/src/main/java/org/apache/tez/examples/SortMergeJoinExample.java
index 1d59a29..5f4de18 100644
--- a/tez-examples/src/main/java/org/apache/tez/examples/SortMergeJoinExample.java
+++ b/tez-examples/src/main/java/org/apache/tez/examples/SortMergeJoinExample.java
@@ -138,7 +138,7 @@ public class SortMergeJoinExample extends TezExampleBase {
* @param inputPath2
* @param outPath
* @param numPartitions
- * @return
+ * @return dag
* @throws IOException
*/
private DAG createDag(TezConfiguration tezConf, Path inputPath1,
[25/43] tez git commit: TEZ-2123. Fix component managers to use
pluggable components. Enable hybrid mode. (sseth)
Posted by ss...@apache.org.
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
index 54b9adb..c1169ef 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
@@ -223,7 +223,7 @@ public class TestContainerReuse {
taskSchedulerEventHandler.handleEvent(
new AMSchedulerEventTAEnded(
- ta11, containerHost1.getId(), TaskAttemptState.SUCCEEDED));
+ ta11, containerHost1.getId(), TaskAttemptState.SUCCEEDED, 0));
drainableAppCallback.drain();
verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
verify(taskSchedulerEventHandler, times(1)).taskAllocated(
@@ -235,7 +235,7 @@ public class TestContainerReuse {
taskSchedulerEventHandler.handleEvent(
new AMSchedulerEventTAEnded(ta21, containerHost2.getId(),
- TaskAttemptState.SUCCEEDED));
+ TaskAttemptState.SUCCEEDED, 0));
long currentTs = System.currentTimeMillis();
Throwable exception = null;
@@ -356,7 +356,7 @@ public class TestContainerReuse {
taskSchedulerEventHandler.handleEvent(
new AMSchedulerEventTAEnded(ta21, containerHost2.getId(),
- TaskAttemptState.SUCCEEDED));
+ TaskAttemptState.SUCCEEDED, 0));
drainableAppCallback.drain();
verify(taskScheduler).deallocateTask(eq(ta21), eq(true));
verify(taskSchedulerEventHandler, times(0)).taskAllocated(
@@ -459,7 +459,7 @@ public class TestContainerReuse {
verify(taskSchedulerEventHandler).taskAllocated(eq(ta11), any(Object.class), eq(container1));
// Task assigned to container completed successfully. Container should be re-used.
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
drainableAppCallback.drain();
verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
verify(taskSchedulerEventHandler).taskAllocated(eq(ta12), any(Object.class), eq(container1));
@@ -469,7 +469,7 @@ public class TestContainerReuse {
// Task assigned to container completed successfully.
// Verify reuse across hosts.
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta12, container1.getId(), TaskAttemptState.SUCCEEDED));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta12, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
drainableAppCallback.drain();
verify(taskScheduler).deallocateTask(eq(ta12), eq(true));
verify(taskSchedulerEventHandler).taskAllocated(eq(ta13), any(Object.class), eq(container1));
@@ -478,7 +478,7 @@ public class TestContainerReuse {
eventHandler.reset();
// Verify no re-use if a previous task fails.
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta13, container1.getId(), TaskAttemptState.FAILED));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta13, container1.getId(), TaskAttemptState.FAILED, 0));
drainableAppCallback.drain();
verify(taskSchedulerEventHandler, times(0)).taskAllocated(eq(ta14), any(Object.class), eq(container1));
verify(taskScheduler).deallocateTask(eq(ta13), eq(false));
@@ -496,7 +496,7 @@ public class TestContainerReuse {
verify(taskSchedulerEventHandler).taskAllocated(eq(ta14), any(Object.class), eq(container2));
// Task assigned to container completed successfully. No pending requests. Container should be released.
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta14, container2.getId(), TaskAttemptState.SUCCEEDED));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta14, container2.getId(), TaskAttemptState.SUCCEEDED, 0));
drainableAppCallback.drain();
verify(taskScheduler).deallocateTask(eq(ta14), eq(true));
verify(rmClient).releaseAssignedContainer(eq(container2.getId()));
@@ -607,7 +607,7 @@ public class TestContainerReuse {
// First task had profiling on. This container can not be reused further.
taskSchedulerEventHandler.handleEvent(
- new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED));
+ new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
drainableAppCallback.drain();
verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
verify(taskSchedulerEventHandler, times(0)).taskAllocated(eq(ta12), any(Object.class),
@@ -653,7 +653,7 @@ public class TestContainerReuse {
// Verify that the container can not be reused when profiling option is turned on
// Even for 2 tasks having same profiling option can have container reusability.
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta13, container2.getId(), TaskAttemptState.SUCCEEDED));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta13, container2.getId(), TaskAttemptState.SUCCEEDED, 0));
drainableAppCallback.drain();
verify(taskScheduler).deallocateTask(eq(ta13), eq(true));
verify(taskSchedulerEventHandler, times(0)).taskAllocated(eq(ta14), any(Object.class),
@@ -698,7 +698,7 @@ public class TestContainerReuse {
verify(taskSchedulerEventHandler).taskAllocated(eq(ta15), any(Object.class), eq(container3));
//Ensure task 6 (of vertex 1) is allocated to same container
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta15, container3.getId(), TaskAttemptState.SUCCEEDED));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta15, container3.getId(), TaskAttemptState.SUCCEEDED, 0));
drainableAppCallback.drain();
verify(taskScheduler).deallocateTask(eq(ta15), eq(true));
verify(taskSchedulerEventHandler).taskAllocated(eq(ta16), any(Object.class), eq(container3));
@@ -811,7 +811,7 @@ public class TestContainerReuse {
// until delay expires.
taskSchedulerEventHandler.handleEvent(
new AMSchedulerEventTAEnded(ta11, container1.getId(),
- TaskAttemptState.SUCCEEDED));
+ TaskAttemptState.SUCCEEDED, 0));
drainableAppCallback.drain();
verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
verify(taskSchedulerEventHandler, times(0)).taskAllocated(
@@ -828,7 +828,7 @@ public class TestContainerReuse {
// TA12 completed.
taskSchedulerEventHandler.handleEvent(
new AMSchedulerEventTAEnded(ta12, container1.getId(),
- TaskAttemptState.SUCCEEDED));
+ TaskAttemptState.SUCCEEDED, 0));
drainableAppCallback.drain();
LOG.info("Sleeping to ensure that the scheduling loop runs");
Thread.sleep(3000l);
@@ -946,7 +946,7 @@ public class TestContainerReuse {
// Container should be assigned to task21.
taskSchedulerEventHandler.handleEvent(
new AMSchedulerEventTAEnded(ta11, container1.getId(),
- TaskAttemptState.SUCCEEDED));
+ TaskAttemptState.SUCCEEDED, 0));
drainableAppCallback.drain();
verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
verify(taskSchedulerEventHandler).taskAllocated(
@@ -956,7 +956,7 @@ public class TestContainerReuse {
// Task 2 completes.
taskSchedulerEventHandler.handleEvent(
new AMSchedulerEventTAEnded(ta21, container1.getId(),
- TaskAttemptState.SUCCEEDED));
+ TaskAttemptState.SUCCEEDED, 0));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
LOG.info("Sleeping to ensure that the scheduling loop runs");
@@ -1065,7 +1065,7 @@ public class TestContainerReuse {
assertEquals(1, assignEvent.getRemoteTaskLocalResources().size());
// Task assigned to container completed successfully. Container should be re-used.
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta111, container1.getId(), TaskAttemptState.SUCCEEDED));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta111, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
drainableAppCallback.drain();
verify(taskScheduler).deallocateTask(eq(ta111), eq(true));
verify(taskSchedulerEventHandler).taskAllocated(eq(ta112), any(Object.class), eq(container1));
@@ -1077,7 +1077,7 @@ public class TestContainerReuse {
// Task assigned to container completed successfully.
// Verify reuse across hosts.
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta112, container1.getId(), TaskAttemptState.SUCCEEDED));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta112, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
drainableAppCallback.drain();
verify(taskScheduler).deallocateTask(eq(ta112), eq(true));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
@@ -1118,7 +1118,7 @@ public class TestContainerReuse {
assertEquals(2, assignEvent.getRemoteTaskLocalResources().size());
eventHandler.reset();
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta211, container1.getId(), TaskAttemptState.SUCCEEDED));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta211, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
drainableAppCallback.drain();
verify(taskScheduler).deallocateTask(eq(ta211), eq(true));
verify(taskSchedulerEventHandler).taskAllocated(eq(ta212), any(Object.class), eq(container1));
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskScheduler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskScheduler.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskScheduler.java
index 60782e6..12390b2 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskScheduler.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskScheduler.java
@@ -59,7 +59,7 @@ public class TestLocalTaskScheduler {
TezConfiguration tezConf = new TezConfiguration();
tezConf.setInt(TezConfiguration.TEZ_AM_INLINE_TASK_EXECUTION_MAX_TASKS, MAX_TASKS);
- LocalContainerFactory containerFactory = new LocalContainerFactory(createMockAppContext());
+ LocalContainerFactory containerFactory = new LocalContainerFactory(createMockAppContext(), 1000);
HashMap<Object, Container> taskAllocations = new LinkedHashMap<Object, Container>();
PriorityBlockingQueue<TaskRequest> taskRequestQueue = new PriorityBlockingQueue<TaskRequest>();
TaskSchedulerAppCallback appClientDelegate = mock(TaskSchedulerAppCallback.class);
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
index 3cf4f6c..25cf4b5 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
@@ -22,6 +22,8 @@ import java.util.HashMap;
import java.util.concurrent.BlockingQueue;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
@@ -81,8 +83,12 @@ public class TestLocalTaskSchedulerService {
*/
@Test(timeout = 5000)
public void testDeallocationBeforeAllocation() {
+ AppContext appContext = mock(AppContext.class);
+ ApplicationAttemptId appAttemptId =
+ ApplicationAttemptId.newInstance(ApplicationId.newInstance(10000l, 1), 1);
+ doReturn(appAttemptId).when(appContext).getApplicationAttemptId();
MockLocalTaskSchedulerSerivce taskSchedulerService = new MockLocalTaskSchedulerSerivce
- (mock(TaskSchedulerAppCallback.class), mock(ContainerSignatureMatcher.class), "", 0, "", mock(AppContext.class));
+ (mock(TaskSchedulerAppCallback.class), mock(ContainerSignatureMatcher.class), "", 0, "", appContext);
taskSchedulerService.init(new Configuration());
taskSchedulerService.start();
@@ -105,8 +111,12 @@ public class TestLocalTaskSchedulerService {
*/
@Test(timeout = 5000)
public void testDeallocationAfterAllocation() {
+ AppContext appContext = mock(AppContext.class);
+ ApplicationAttemptId appAttemptId =
+ ApplicationAttemptId.newInstance(ApplicationId.newInstance(10000l, 1), 1);
+ doReturn(appAttemptId).when(appContext).getApplicationAttemptId();
MockLocalTaskSchedulerSerivce taskSchedulerService = new MockLocalTaskSchedulerSerivce
- (mock(TaskSchedulerAppCallback.class), mock(ContainerSignatureMatcher.class), "", 0, "", mock(AppContext.class));
+ (mock(TaskSchedulerAppCallback.class), mock(ContainerSignatureMatcher.class), "", 0, "", appContext);
taskSchedulerService.init(new Configuration());
taskSchedulerService.start();
@@ -132,13 +142,13 @@ public class TestLocalTaskSchedulerService {
String appHostName, int appHostPort, String appTrackingUrl,
AppContext appContext) {
super(appClient, containerSignatureMatcher, appHostName, appHostPort,
- appTrackingUrl, appContext);
+ appTrackingUrl, 10000l, appContext);
}
@Override
public AsyncDelegateRequestHandler createRequestHandler(Configuration conf) {
requestHandler = new MockAsyncDelegateRequestHandler(taskRequestQueue,
- new LocalContainerFactory(appContext),
+ new LocalContainerFactory(appContext, customContainerAppId),
taskAllocations,
appClientDelegate,
conf);
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
index 291e786..4ee05cc 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
@@ -89,7 +89,7 @@ public class TestTaskSchedulerEventHandler {
public MockTaskSchedulerEventHandler(AppContext appContext,
DAGClientServer clientService, EventHandler eventHandler,
ContainerSignatureMatcher containerSignatureMatcher, WebUIService webUI) {
- super(appContext, clientService, eventHandler, containerSignatureMatcher, webUI, new String[] {});
+ super(appContext, clientService, eventHandler, containerSignatureMatcher, webUI, new String[] {}, false);
}
@Override
@@ -162,7 +162,7 @@ public class TestTaskSchedulerEventHandler {
AMSchedulerEventTALaunchRequest lr =
new AMSchedulerEventTALaunchRequest(mockAttemptId, resource, null, mockTaskAttempt, locHint,
- priority, containerContext);
+ priority, containerContext, 0, 0, 0);
schedulerHandler.taskAllocated(mockTaskAttempt, lr, container);
assertEquals(2, mockEventHandler.events.size());
assertTrue(mockEventHandler.events.get(1) instanceof AMContainerEventAssignTA);
@@ -249,9 +249,14 @@ public class TestTaskSchedulerEventHandler {
Configuration conf = new Configuration(false);
schedulerHandler.init(conf);
schedulerHandler.start();
-
+
+ AMContainer mockAmContainer = mock(AMContainer.class);
+ when(mockAmContainer.getTaskSchedulerIdentifier()).thenReturn(0);
+ when(mockAmContainer.getContainerLauncherIdentifier()).thenReturn(0);
+ when(mockAmContainer.getTaskCommunicatorIdentifier()).thenReturn(0);
ContainerId mockCId = mock(ContainerId.class);
verify(mockTaskScheduler, times(0)).deallocateContainer((ContainerId)any());
+ when(mockAMContainerMap.get(mockCId)).thenReturn(mockAmContainer);
schedulerHandler.preemptContainer(mockCId);
verify(mockTaskScheduler, times(1)).deallocateContainer(mockCId);
assertEquals(1, mockEventHandler.events.size());
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
index d775300..ffab769 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
@@ -127,7 +127,7 @@ class TestTaskSchedulerHelpers {
EventHandler eventHandler,
TezAMRMClientAsync<CookieContainerRequest> amrmClientAsync,
ContainerSignatureMatcher containerSignatureMatcher) {
- super(appContext, null, eventHandler, containerSignatureMatcher, null, new String[]{});
+ super(appContext, null, eventHandler, containerSignatureMatcher, null, new String[]{}, false);
this.amrmClientAsync = amrmClientAsync;
this.containerSignatureMatcher = containerSignatureMatcher;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java
index fafbba6..bdd0f61 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java
@@ -63,6 +63,7 @@ import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.util.SystemClock;
import org.apache.tez.common.security.JobTokenIdentifier;
import org.apache.tez.common.security.TokenCache;
+import org.apache.tez.dag.api.TaskCommunicator;
import org.apache.tez.dag.app.AppContext;
import org.apache.tez.dag.app.ContainerHeartbeatHandler;
import org.apache.tez.dag.app.ContainerContext;
@@ -104,7 +105,7 @@ public class TestAMContainer {
wc.verifyState(AMContainerState.LAUNCHING);
// 1 Launch request.
wc.verifyCountAndGetOutgoingEvents(1);
- verify(wc.tal).registerRunningContainer(wc.containerID);
+ verify(wc.tal).registerRunningContainer(wc.containerID, 0);
assertNull(wc.amContainer.getCurrentTaskAttempt());
// Assign task.
@@ -121,7 +122,7 @@ public class TestAMContainer {
// Once for the previous NO_TASKS, one for the actual task.
verify(wc.chh).register(wc.containerID);
ArgumentCaptor<AMContainerTask> argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
- verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+ verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
assertEquals(1, argumentCaptor.getAllValues().size());
assertEquals(wc.taskAttemptID, argumentCaptor.getAllValues().get(0).getTask().getTaskAttemptID());
assertEquals(WrappedContainer.taskPriority, argumentCaptor.getAllValues().get(0).getPriority());
@@ -131,14 +132,14 @@ public class TestAMContainer {
wc.verifyState(AMContainerState.IDLE);
wc.verifyNoOutgoingEvents();
assertNull(wc.amContainer.getCurrentTaskAttempt());
- verify(wc.tal).unregisterTaskAttempt(wc.taskAttemptID);
+ verify(wc.tal).unregisterTaskAttempt(wc.taskAttemptID, 0);
// Container completed
wc.containerCompleted();
wc.verifyHistoryStopEvent();
wc.verifyState(AMContainerState.COMPLETED);
wc.verifyNoOutgoingEvents();
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
verify(wc.chh).unregister(wc.containerID);
assertEquals(1, wc.amContainer.getAllTaskAttempts().size());
@@ -157,7 +158,7 @@ public class TestAMContainer {
wc.verifyState(AMContainerState.LAUNCHING);
// 1 Launch request.
wc.verifyCountAndGetOutgoingEvents(1);
- verify(wc.tal).registerRunningContainer(wc.containerID);
+ verify(wc.tal).registerRunningContainer(wc.containerID, 0);
// Container Launched
wc.containerLaunched();
@@ -172,7 +173,7 @@ public class TestAMContainer {
wc.verifyNoOutgoingEvents();
assertEquals(wc.taskAttemptID, wc.amContainer.getCurrentTaskAttempt());
ArgumentCaptor<AMContainerTask> argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
- verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+ verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
assertEquals(1, argumentCaptor.getAllValues().size());
assertEquals(wc.taskAttemptID, argumentCaptor.getAllValues().get(0).getTask().getTaskAttemptID());
@@ -180,13 +181,13 @@ public class TestAMContainer {
wc.verifyState(AMContainerState.IDLE);
wc.verifyNoOutgoingEvents();
assertNull(wc.amContainer.getCurrentTaskAttempt());
- verify(wc.tal).unregisterTaskAttempt(wc.taskAttemptID);
+ verify(wc.tal).unregisterTaskAttempt(wc.taskAttemptID, 0);
wc.containerCompleted();
wc.verifyHistoryStopEvent();
wc.verifyState(AMContainerState.COMPLETED);
wc.verifyNoOutgoingEvents();
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
verify(wc.chh).unregister(wc.containerID);
assertEquals(1, wc.amContainer.getAllTaskAttempts().size());
@@ -205,7 +206,7 @@ public class TestAMContainer {
wc.verifyState(AMContainerState.LAUNCHING);
// 1 Launch request.
wc.verifyCountAndGetOutgoingEvents(1);
- verify(wc.tal).registerRunningContainer(wc.containerID);
+ verify(wc.tal).registerRunningContainer(wc.containerID, 0);
assertNull(wc.amContainer.getCurrentTaskAttempt());
// Assign task.
@@ -222,7 +223,7 @@ public class TestAMContainer {
// Once for the previous NO_TASKS, one for the actual task.
verify(wc.chh).register(wc.containerID);
ArgumentCaptor<AMContainerTask> argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
- verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+ verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
assertEquals(1, argumentCaptor.getAllValues().size());
assertEquals(wc.taskAttemptID, argumentCaptor.getAllValues().get(0).getTask().getTaskAttemptID());
@@ -231,13 +232,13 @@ public class TestAMContainer {
wc.verifyState(AMContainerState.IDLE);
wc.verifyNoOutgoingEvents();
assertNull(wc.amContainer.getCurrentTaskAttempt());
- verify(wc.tal).unregisterTaskAttempt(wc.taskAttemptID);
+ verify(wc.tal).unregisterTaskAttempt(wc.taskAttemptID, 0);
TezTaskAttemptID taId2 = TezTaskAttemptID.getInstance(wc.taskID, 2);
wc.assignTaskAttempt(taId2);
wc.verifyState(AMContainerState.RUNNING);
argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
- verify(wc.tal, times(2)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+ verify(wc.tal, times(2)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
assertEquals(2, argumentCaptor.getAllValues().size());
assertEquals(taId2, argumentCaptor.getAllValues().get(1).getTask().getTaskAttemptID());
@@ -246,14 +247,14 @@ public class TestAMContainer {
wc.verifyState(AMContainerState.IDLE);
wc.verifyNoOutgoingEvents();
assertNull(wc.amContainer.getCurrentTaskAttempt());
- verify(wc.tal).unregisterTaskAttempt(taId2);
+ verify(wc.tal).unregisterTaskAttempt(taId2, 0);
// Container completed
wc.containerCompleted();
wc.verifyHistoryStopEvent();
wc.verifyState(AMContainerState.COMPLETED);
wc.verifyNoOutgoingEvents();
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
verify(wc.chh).unregister(wc.containerID);
assertEquals(2, wc.amContainer.getAllTaskAttempts().size());
@@ -286,7 +287,7 @@ public class TestAMContainer {
wc.verifyHistoryStopEvent();
wc.verifyState(AMContainerState.COMPLETED);
wc.verifyNoOutgoingEvents();
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
verify(wc.chh).unregister(wc.containerID);
assertNull(wc.amContainer.getCurrentTaskAttempt());
@@ -323,7 +324,7 @@ public class TestAMContainer {
wc.verifyHistoryStopEvent();
wc.verifyState(AMContainerState.COMPLETED);
wc.verifyNoOutgoingEvents();
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
verify(wc.chh).unregister(wc.containerID);
assertNull(wc.amContainer.getCurrentTaskAttempt());
@@ -346,7 +347,7 @@ public class TestAMContainer {
wc.assignTaskAttempt(taID2);
wc.verifyState(AMContainerState.STOP_REQUESTED);
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
verify(wc.chh).unregister(wc.containerID);
// 1 for NM stop request. 2 TERMINATING to TaskAttempt.
outgoingEvents = wc.verifyCountAndGetOutgoingEvents(3);
@@ -378,13 +379,13 @@ public class TestAMContainer {
wc.launchContainer();
wc.assignTaskAttempt(wc.taskAttemptID);
wc.verifyState(AMContainerState.LAUNCHING);
- verify(wc.tal).registerRunningContainer(wc.containerID);
+ verify(wc.tal).registerRunningContainer(wc.containerID, 0);
TezTaskAttemptID taID2 = TezTaskAttemptID.getInstance(wc.taskID, 2);
wc.assignTaskAttempt(taID2);
wc.verifyState(AMContainerState.STOP_REQUESTED);
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
verify(wc.chh).unregister(wc.containerID);
// 1 for NM stop request. 2 TERMINATING to TaskAttempt.
outgoingEvents = wc.verifyCountAndGetOutgoingEvents(3);
@@ -420,7 +421,7 @@ public class TestAMContainer {
wc.containerTimedOut();
wc.verifyState(AMContainerState.STOP_REQUESTED);
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
verify(wc.chh).unregister(wc.containerID);
// 1 to TA, 1 for RM de-allocate.
outgoingEvents = wc.verifyCountAndGetOutgoingEvents(2);
@@ -454,7 +455,7 @@ public class TestAMContainer {
wc.stopRequest();
wc.verifyState(AMContainerState.STOP_REQUESTED);
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
verify(wc.chh).unregister(wc.containerID);
// 1 to TA, 1 for RM de-allocate.
outgoingEvents = wc.verifyCountAndGetOutgoingEvents(2);
@@ -484,11 +485,11 @@ public class TestAMContainer {
wc.launchContainer();
wc.assignTaskAttempt(wc.taskAttemptID);
wc.verifyState(AMContainerState.LAUNCHING);
- verify(wc.tal).registerRunningContainer(wc.containerID);
+ verify(wc.tal).registerRunningContainer(wc.containerID, 0);
wc.launchFailed();
wc.verifyState(AMContainerState.STOPPING);
- verify(wc.tal).registerRunningContainer(wc.containerID);
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
outgoingEvents = wc.verifyCountAndGetOutgoingEvents(2);
verifyUnOrderedOutgoingEventTypes(outgoingEvents,
@@ -537,8 +538,8 @@ public class TestAMContainer {
wc.containerCompleted();
wc.verifyState(AMContainerState.COMPLETED);
- verify(wc.tal).registerRunningContainer(wc.containerID);
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
verifyUnOrderedOutgoingEventTypes(outgoingEvents,
@@ -567,8 +568,8 @@ public class TestAMContainer {
wc.containerCompleted(ContainerExitStatus.DISKS_FAILED, TaskAttemptTerminationCause.NODE_DISK_ERROR);
wc.verifyState(AMContainerState.COMPLETED);
- verify(wc.tal).registerRunningContainer(wc.containerID);
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
verifyUnOrderedOutgoingEventTypes(outgoingEvents,
@@ -598,8 +599,8 @@ public class TestAMContainer {
wc.containerCompleted(ContainerExitStatus.ABORTED, TaskAttemptTerminationCause.NODE_FAILED);
wc.verifyState(AMContainerState.COMPLETED);
- verify(wc.tal).registerRunningContainer(wc.containerID);
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
verifyUnOrderedOutgoingEventTypes(outgoingEvents,
@@ -629,8 +630,8 @@ public class TestAMContainer {
wc.containerCompleted();
wc.verifyState(AMContainerState.COMPLETED);
- verify(wc.tal).registerRunningContainer(wc.containerID);
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
verify(wc.chh).register(wc.containerID);
verify(wc.chh).unregister(wc.containerID);
@@ -658,8 +659,8 @@ public class TestAMContainer {
wc.containerCompleted();
wc.verifyState(AMContainerState.COMPLETED);
- verify(wc.tal).registerRunningContainer(wc.containerID);
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
verify(wc.chh).register(wc.containerID);
verify(wc.chh).unregister(wc.containerID);
@@ -693,8 +694,8 @@ public class TestAMContainer {
wc.containerCompleted(ContainerExitStatus.PREEMPTED, TaskAttemptTerminationCause.EXTERNAL_PREEMPTION);
wc.verifyState(AMContainerState.COMPLETED);
- verify(wc.tal).registerRunningContainer(wc.containerID);
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
verify(wc.chh).register(wc.containerID);
verify(wc.chh).unregister(wc.containerID);
@@ -730,8 +731,8 @@ public class TestAMContainer {
wc.containerCompleted(ContainerExitStatus.INVALID, TaskAttemptTerminationCause.INTERNAL_PREEMPTION);
wc.verifyState(AMContainerState.COMPLETED);
- verify(wc.tal).registerRunningContainer(wc.containerID);
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
verify(wc.chh).register(wc.containerID);
verify(wc.chh).unregister(wc.containerID);
@@ -767,8 +768,8 @@ public class TestAMContainer {
wc.containerCompleted(ContainerExitStatus.DISKS_FAILED, TaskAttemptTerminationCause.NODE_DISK_ERROR);
wc.verifyState(AMContainerState.COMPLETED);
- verify(wc.tal).registerRunningContainer(wc.containerID);
- verify(wc.tal).unregisterRunningContainer(wc.containerID);
+ verify(wc.tal).registerRunningContainer(wc.containerID, 0);
+ verify(wc.tal).unregisterRunningContainer(wc.containerID, 0);
verify(wc.chh).register(wc.containerID);
verify(wc.chh).unregister(wc.containerID);
@@ -1011,7 +1012,7 @@ public class TestAMContainer {
wc.containerLaunched();
wc.assignTaskAttempt(wc.taskAttemptID);
ArgumentCaptor<AMContainerTask> argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
- verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+ verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
AMContainerTask task1 = argumentCaptor.getAllValues().get(0);
assertEquals(0, task1.getAdditionalResources().size());
wc.taskAttemptSucceeded(wc.taskAttemptID);
@@ -1024,7 +1025,7 @@ public class TestAMContainer {
TezTaskAttemptID taID2 = TezTaskAttemptID.getInstance(wc.taskID, 2);
wc.assignTaskAttempt(taID2, additionalResources, new Credentials());
argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
- verify(wc.tal, times(2)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+ verify(wc.tal, times(2)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
AMContainerTask task2 = argumentCaptor.getAllValues().get(1);
Map<String, LocalResource> pullTaskAdditionalResources = task2.getAdditionalResources();
assertEquals(2, pullTaskAdditionalResources.size());
@@ -1047,7 +1048,7 @@ public class TestAMContainer {
TezTaskAttemptID taID3 = TezTaskAttemptID.getInstance(wc.taskID, 3);
wc.assignTaskAttempt(taID3, new HashMap<String, LocalResource>(), new Credentials());
argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
- verify(wc.tal, times(3)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+ verify(wc.tal, times(3)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
AMContainerTask task3 = argumentCaptor.getAllValues().get(2);
assertEquals(0, task3.getAdditionalResources().size());
wc.taskAttemptSucceeded(taID3);
@@ -1100,7 +1101,7 @@ public class TestAMContainer {
wc.containerLaunched();
wc.assignTaskAttempt(attempt11, LRs, dag1Credentials);
argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
- verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+ verify(wc.tal, times(1)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
fetchedTask = argumentCaptor.getAllValues().get(0);
assertTrue(fetchedTask.haveCredentialsChanged());
assertNotNull(fetchedTask.getCredentials());
@@ -1109,7 +1110,7 @@ public class TestAMContainer {
wc.assignTaskAttempt(attempt12, LRs, dag1Credentials);
argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
- verify(wc.tal, times(2)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+ verify(wc.tal, times(2)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
fetchedTask = argumentCaptor.getAllValues().get(1);
assertFalse(fetchedTask.haveCredentialsChanged());
assertNull(fetchedTask.getCredentials());
@@ -1119,7 +1120,7 @@ public class TestAMContainer {
wc.setNewDAGID(dagID2);
wc.assignTaskAttempt(attempt21, LRs, null);
argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
- verify(wc.tal, times(3)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+ verify(wc.tal, times(3)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
fetchedTask = argumentCaptor.getAllValues().get(2);
assertTrue(fetchedTask.haveCredentialsChanged());
assertNull(fetchedTask.getCredentials());
@@ -1127,7 +1128,7 @@ public class TestAMContainer {
wc.assignTaskAttempt(attempt22, LRs, null);
argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
- verify(wc.tal, times(4)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+ verify(wc.tal, times(4)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
fetchedTask = argumentCaptor.getAllValues().get(3);
assertFalse(fetchedTask.haveCredentialsChanged());
assertNull(fetchedTask.getCredentials());
@@ -1137,7 +1138,7 @@ public class TestAMContainer {
wc.setNewDAGID(dagID3);
wc.assignTaskAttempt(attempt31, LRs , dag3Credentials);
argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
- verify(wc.tal, times(5)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+ verify(wc.tal, times(5)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
fetchedTask = argumentCaptor.getAllValues().get(4);
assertTrue(fetchedTask.haveCredentialsChanged());
assertNotNull(fetchedTask.getCredentials());
@@ -1147,7 +1148,7 @@ public class TestAMContainer {
wc.assignTaskAttempt(attempt32, LRs, dag1Credentials);
argumentCaptor = ArgumentCaptor.forClass(AMContainerTask.class);
- verify(wc.tal, times(6)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID));
+ verify(wc.tal, times(6)).registerTaskAttempt(argumentCaptor.capture(), eq(wc.containerID), eq(0));
fetchedTask = argumentCaptor.getAllValues().get(5);
assertFalse(fetchedTask.haveCredentialsChanged());
assertNull(fetchedTask.getCredentials());
@@ -1200,9 +1201,10 @@ public class TestAMContainer {
chh = mock(ContainerHeartbeatHandler.class);
- InetSocketAddress addr = new InetSocketAddress("localhost", 0);
tal = mock(TaskAttemptListener.class);
- doReturn(addr).when(tal).getAddress();
+ TaskCommunicator taskComm = mock(TaskCommunicator.class);
+ doReturn(new InetSocketAddress("localhost", 0)).when(taskComm).getAddress();
+ doReturn(taskComm).when(tal).getTaskCommunicator(0);
dagID = TezDAGID.getInstance(applicationID, 1);
vertexID = TezVertexID.getInstance(dagID, 1);
@@ -1228,7 +1230,7 @@ public class TestAMContainer {
doReturn(taskAttemptID).when(taskSpec).getTaskAttemptID();
amContainer = new AMContainerImpl(container, chh, tal,
- new ContainerContextMatcher(), appContext);
+ new ContainerContextMatcher(), appContext, 0, 0, 0);
}
public WrappedContainer() {
@@ -1278,7 +1280,7 @@ public class TestAMContainer {
Token<JobTokenIdentifier> jobToken = mock(Token.class);
TokenCache.setSessionToken(jobToken, credentials);
amContainer.handle(new AMContainerEventLaunchRequest(containerID, vertexID,
- new ContainerContext(localResources, credentials, new HashMap<String, String>(), "")));
+ new ContainerContext(localResources, credentials, new HashMap<String, String>(), ""), 0, 0));
}
public void assignTaskAttempt(TezTaskAttemptID taID) {
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainerMap.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainerMap.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainerMap.java
index 61371e8..dee4541 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainerMap.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainerMap.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.Token;
+import org.apache.tez.dag.api.TaskCommunicator;
import org.apache.tez.dag.app.AppContext;
import org.apache.tez.dag.app.ContainerHeartbeatHandler;
import org.apache.tez.dag.app.TaskAttemptListener;
@@ -43,8 +44,9 @@ public class TestAMContainerMap {
private TaskAttemptListener mockTaskAttemptListener() {
TaskAttemptListener tal = mock(TaskAttemptListener.class);
- InetSocketAddress socketAddr = new InetSocketAddress("localhost", 21000);
- doReturn(socketAddr).when(tal).getAddress();
+ TaskCommunicator taskComm = mock(TaskCommunicator.class);
+ doReturn(new InetSocketAddress("localhost", 21000)).when(taskComm).getAddress();
+ doReturn(taskComm).when(tal).getTaskCommunicator(0);
return tal;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-examples/src/main/java/org/apache/tez/examples/JoinValidate.java
----------------------------------------------------------------------
diff --git a/tez-examples/src/main/java/org/apache/tez/examples/JoinValidate.java b/tez-examples/src/main/java/org/apache/tez/examples/JoinValidate.java
index d7fc5ac..52643c5 100644
--- a/tez-examples/src/main/java/org/apache/tez/examples/JoinValidate.java
+++ b/tez-examples/src/main/java/org/apache/tez/examples/JoinValidate.java
@@ -19,6 +19,7 @@
package org.apache.tez.examples;
import java.io.IOException;
+import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -130,7 +131,7 @@ public class JoinValidate extends TezExampleBase {
private DAG createDag(TezConfiguration tezConf, Path lhs, Path rhs, int numPartitions)
throws IOException {
- DAG dag = DAG.create("JoinValidate");
+ DAG dag = DAG.create(getDagName());
// Configuration for intermediate output - shared by Vertex1 and Vertex2
// This should only be setting selective keys from the underlying conf. Fix after there's a
@@ -147,15 +148,18 @@ public class JoinValidate extends TezExampleBase {
MRInput
.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
lhs.toUri().toString()).groupSplits(!isDisableSplitGrouping()).build());
+ setVertexProperties(lhsVertex, getLhsVertexProperties());
Vertex rhsVertex = Vertex.create(RHS_INPUT_NAME, ProcessorDescriptor.create(
ForwardingProcessor.class.getName())).addDataSource("rhs",
MRInput
.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
rhs.toUri().toString()).groupSplits(!isDisableSplitGrouping()).build());
+ setVertexProperties(rhsVertex, getRhsVertexProperties());
Vertex joinValidateVertex = Vertex.create("joinvalidate", ProcessorDescriptor.create(
JoinValidateProcessor.class.getName()), numPartitions);
+ setVertexProperties(joinValidateVertex, getValidateVertexProperties());
Edge e1 = Edge.create(lhsVertex, joinValidateVertex, edgeConf.createDefaultEdgeProperty());
Edge e2 = Edge.create(rhsVertex, joinValidateVertex, edgeConf.createDefaultEdgeProperty());
@@ -165,6 +169,30 @@ public class JoinValidate extends TezExampleBase {
return dag;
}
+ private void setVertexProperties(Vertex vertex, Map<String, String> properties) {
+ if (properties != null) {
+ for (Map.Entry<String, String> entry : properties.entrySet()) {
+ vertex.setConf(entry.getKey(), entry.getValue());
+ }
+ }
+ }
+
+ protected Map<String, String> getLhsVertexProperties() {
+ return null;
+ }
+
+ protected Map<String, String> getRhsVertexProperties() {
+ return null;
+ }
+
+ protected Map<String, String> getValidateVertexProperties() {
+ return null;
+ }
+
+ protected String getDagName() {
+ return "JoinValidate";
+ }
+
public static class JoinValidateProcessor extends SimpleProcessor {
private static final Logger LOG = LoggerFactory.getLogger(JoinValidateProcessor.class);
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
index e83165b..27356bc 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
@@ -14,6 +14,8 @@
package org.apache.tez.dag.app.launcher;
+import java.net.InetSocketAddress;
+
import com.google.common.base.Preconditions;
import com.google.protobuf.ByteString;
import org.apache.commons.logging.Log;
@@ -124,7 +126,8 @@ public class TezTestServiceContainerLauncher extends AbstractService implements
private RunContainerRequestProto constructRunContainerRequest(NMCommunicatorLaunchRequestEvent event) {
RunContainerRequestProto.Builder builder = RunContainerRequestProto.newBuilder();
- builder.setAmHost(tal.getAddress().getHostName()).setAmPort(tal.getAddress().getPort());
+ InetSocketAddress address = tal.getTaskCommunicator(event.getTaskCommId()).getAddress();
+ builder.setAmHost(address.getHostName()).setAmPort(address.getPort());
builder.setAppAttemptNumber(event.getContainer().getId().getApplicationAttemptId().getAttemptId());
builder.setApplicationIdString(
event.getContainer().getId().getApplicationAttemptId().getApplicationId().toString());
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
index e3c18bf..5657f86 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
@@ -14,7 +14,6 @@
package org.apache.tez.dag.app.rm;
-import java.io.IOException;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
@@ -32,25 +31,17 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.client.api.AMRMClient;
-import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
-import org.apache.hadoop.yarn.exceptions.YarnException;
-import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.app.AppContext;
import org.apache.tez.service.TezTestServiceConfConstants;
-// TODO Registration with RM - so that the AM is considered dead and restarted in the expiry interval - 10 minutes.
-
public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
private static final Log LOG = LogFactory.getLog(TezTestServiceTaskSchedulerService.class);
@@ -71,7 +62,7 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
private final ConcurrentMap<Object, ContainerId> runningTasks =
new ConcurrentHashMap<Object, ContainerId>();
- private final AMRMClientAsync<AMRMClient.ContainerRequest> amRmClient;
+ // AppIdIdentifier to avoid conflicts with other containres in the system.
// Per instance
private final int memoryPerInstance;
@@ -82,10 +73,13 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
private final Resource resourcePerContainer;
+ // Not registering with the RM. Assuming the main TezScheduler will always run (except local mode),
+ // and take care of YARN registration.
public TezTestServiceTaskSchedulerService(TaskSchedulerAppCallback appClient,
AppContext appContext,
String clientHostname, int clientPort,
String trackingUrl,
+ long customAppIdIdentifier,
Configuration conf) {
// Accepting configuration here to allow setting up fields as final
super(TezTestServiceTaskSchedulerService.class.getName());
@@ -93,7 +87,7 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
this.appClientDelegate = createAppCallbackDelegate(appClient);
this.appContext = appContext;
this.serviceHosts = new LinkedList<String>();
- this.containerFactory = new ContainerFactory(appContext);
+ this.containerFactory = new ContainerFactory(appContext, customAppIdIdentifier);
this.memoryPerInstance = conf
.getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB, -1);
@@ -123,7 +117,6 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
int memoryPerContainer = (int) (memoryPerInstance / (float) executorsPerInstance);
int coresPerContainer = (int) (coresPerInstance / (float) executorsPerInstance);
this.resourcePerContainer = Resource.newInstance(memoryPerContainer, coresPerContainer);
- this.amRmClient = TezAMRMClientAsync.createAMRMClientAsync(5000, new FakeAmRmCallbackHandler());
String[] hosts = conf.getTrimmedStrings(TezTestServiceConfConstants.TEZ_TEST_SERVICE_HOSTS);
if (hosts == null || hosts.length == 0) {
@@ -143,36 +136,8 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
}
@Override
- public void serviceInit(Configuration conf) {
- amRmClient.init(conf);
- }
-
- @Override
- public void serviceStart() {
- amRmClient.start();
- RegisterApplicationMasterResponse response;
- try {
- amRmClient.registerApplicationMaster(clientHostname, clientPort, trackingUrl);
- } catch (YarnException e) {
- throw new TezUncheckedException(e);
- } catch (IOException e) {
- throw new TezUncheckedException(e);
- }
- }
-
- @Override
public void serviceStop() {
if (!this.isStopped.getAndSet(true)) {
-
- try {
- TaskSchedulerAppCallback.AppFinalStatus status = appClientDelegate.getFinalAppStatus();
- amRmClient.unregisterApplicationMaster(status.exitStatus, status.exitMessage,
- status.postCompletionTrackingUrl);
- } catch (YarnException e) {
- throw new TezUncheckedException(e);
- } catch (IOException e) {
- throw new TezUncheckedException(e);
- }
appCallbackExecutor.shutdownNow();
}
}
@@ -264,7 +229,7 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
private ExecutorService createAppCallbackExecutorService() {
return Executors.newSingleThreadExecutor(new ThreadFactoryBuilder()
- .setNameFormat("TaskSchedulerAppCaller #%d").setDaemon(true).build());
+ .setNameFormat("TezTestTaskSchedulerAppCaller").setDaemon(true).build());
}
private TaskSchedulerAppCallback createAppCallbackDelegate(
@@ -274,7 +239,7 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
}
private String selectHost(String[] requestedHosts) {
- String host = null;
+ String host;
if (requestedHosts != null && requestedHosts.length > 0) {
Arrays.sort(requestedHosts);
host = requestedHosts[0];
@@ -287,17 +252,19 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
}
static class ContainerFactory {
- final AppContext appContext;
AtomicInteger nextId;
-
- public ContainerFactory(AppContext appContext) {
- this.appContext = appContext;
- this.nextId = new AtomicInteger(2);
+ final ApplicationAttemptId customAppAttemptId;
+
+ public ContainerFactory(AppContext appContext, long appIdLong) {
+ this.nextId = new AtomicInteger(1);
+ ApplicationId appId = ApplicationId
+ .newInstance(appIdLong, appContext.getApplicationAttemptId().getApplicationId().getId());
+ this.customAppAttemptId = ApplicationAttemptId
+ .newInstance(appId, appContext.getApplicationAttemptId().getAttemptId());
}
public Container createContainer(Resource capability, Priority priority, String hostname, int port) {
- ApplicationAttemptId appAttemptId = appContext.getApplicationAttemptId();
- ContainerId containerId = ContainerId.newInstance(appAttemptId, nextId.getAndIncrement());
+ ContainerId containerId = ContainerId.newInstance(customAppAttemptId, nextId.getAndIncrement());
NodeId nodeId = NodeId.newInstance(hostname, port);
String nodeHttpAddress = "hostname:0";
@@ -311,37 +278,4 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
return container;
}
}
-
- private static class FakeAmRmCallbackHandler implements AMRMClientAsync.CallbackHandler {
-
- @Override
- public void onContainersCompleted(List<ContainerStatus> statuses) {
-
- }
-
- @Override
- public void onContainersAllocated(List<Container> containers) {
-
- }
-
- @Override
- public void onShutdownRequest() {
-
- }
-
- @Override
- public void onNodesUpdated(List<NodeReport> updatedNodes) {
-
- }
-
- @Override
- public float getProgress() {
- return 0;
- }
-
- @Override
- public void onError(Throwable e) {
-
- }
- }
}
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-ext-service-tests/src/test/java/org/apache/tez/examples/JoinValidateConfigured.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/examples/JoinValidateConfigured.java b/tez-ext-service-tests/src/test/java/org/apache/tez/examples/JoinValidateConfigured.java
new file mode 100644
index 0000000..e5d2e3b
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/examples/JoinValidateConfigured.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.examples;
+
+import java.util.Map;
+
+public class JoinValidateConfigured extends JoinValidate {
+
+ private final Map<String, String> lhsProps;
+ private final Map<String, String> rhsProps;
+ private final Map<String, String> validateProps;
+ private final String dagNameSuffix;
+
+ public JoinValidateConfigured(Map<String, String> lhsProps, Map<String, String> rhsProps,
+ Map<String, String> validateProps, String dagNameSuffix) {
+ this.lhsProps = lhsProps;
+ this.rhsProps = rhsProps;
+ this.validateProps = validateProps;
+ this.dagNameSuffix = dagNameSuffix;
+ }
+
+ @Override
+ protected Map<String, String> getLhsVertexProperties() {
+ return this.lhsProps;
+ }
+
+ @Override
+ protected Map<String, String> getRhsVertexProperties() {
+ return this.rhsProps;
+ }
+
+ @Override
+ protected Map<String, String> getValidateVertexProperties() {
+ return this.validateProps;
+ }
+
+ @Override
+ protected String getDagName() {
+ return "JoinValidate_" + dagNameSuffix;
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/c47951ab/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
index ae7e7f8..9c149c6 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
@@ -15,11 +15,11 @@
package org.apache.tez.tests;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.Map;
+import com.google.common.collect.Maps;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -28,13 +28,14 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.tez.client.TezClient;
import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezConstants;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.app.launcher.TezTestServiceNoOpContainerLauncher;
import org.apache.tez.dag.app.rm.TezTestServiceTaskSchedulerService;
import org.apache.tez.dag.app.taskcomm.TezTestServiceTaskCommunicatorImpl;
import org.apache.tez.examples.HashJoinExample;
import org.apache.tez.examples.JoinDataGen;
-import org.apache.tez.examples.JoinValidate;
+import org.apache.tez.examples.JoinValidateConfigured;
import org.apache.tez.service.MiniTezTestServiceCluster;
import org.apache.tez.test.MiniTezCluster;
import org.junit.AfterClass;
@@ -47,23 +48,31 @@ public class TestExternalTezServices {
private static final String EXT_PUSH_ENTITY_NAME = "ExtServiceTestPush";
- private static MiniTezCluster tezCluster;
- private static MiniDFSCluster dfsCluster;
- private static MiniTezTestServiceCluster tezTestServiceCluster;
+ private static volatile MiniTezCluster tezCluster;
+ private static volatile MiniDFSCluster dfsCluster;
+ private static volatile MiniTezTestServiceCluster tezTestServiceCluster;
- private static Configuration clusterConf = new Configuration();
- private static Configuration confForJobs;
+ private static volatile Configuration clusterConf = new Configuration();
+ private static volatile Configuration confForJobs;
- private static FileSystem remoteFs;
- private static FileSystem localFs;
+ private static volatile FileSystem remoteFs;
+ private static volatile FileSystem localFs;
- private static TezClient sharedTezClient;
+ private static volatile TezClient sharedTezClient;
+
+ private static final Path SRC_DATA_DIR = new Path("/tmp/" + TestExternalTezServices.class.getSimpleName());
+ private static final Path HASH_JOIN_EXPECTED_RESULT_PATH = new Path(SRC_DATA_DIR, "expectedOutputPath");
+ private static final Path HASH_JOIN_OUTPUT_PATH = new Path(SRC_DATA_DIR, "outPath");
+
+ private static final Map<String, String> PROPS_EXT_SERVICE_PUSH = Maps.newHashMap();
+ private static final Map<String, String> PROPS_REGULAR_CONTAINERS = Maps.newHashMap();
+ private static final Map<String, String> PROPS_IN_AM = Maps.newHashMap();
private static String TEST_ROOT_DIR = "target" + Path.SEPARATOR + TestExternalTezServices.class.getName()
+ "-tmpDir";
@BeforeClass
- public static void setup() throws IOException, TezException, InterruptedException {
+ public static void setup() throws Exception {
localFs = FileSystem.getLocal(clusterConf);
@@ -108,27 +117,79 @@ public class TestExternalTezServices {
remoteFs.mkdirs(stagingDirPath);
// This is currently configured to push tasks into the Service, and then use the standard RPC
confForJobs.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
- confForJobs.set(TezConfiguration.TEZ_AM_TASK_SCHEDULERS,
+
+ confForJobs.setStrings(TezConfiguration.TEZ_AM_TASK_SCHEDULERS,
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT,
+// TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceTaskSchedulerService.class.getName());
- confForJobs.set(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHERS,
- EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceNoOpContainerLauncher.class.getName());
- confForJobs.set(TezConfiguration.TEZ_AM_TASK_COMMUNICATORS,
- EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceTaskCommunicatorImpl.class.getName());
- confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME, EXT_PUSH_ENTITY_NAME);
- confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME, EXT_PUSH_ENTITY_NAME);
- confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME, EXT_PUSH_ENTITY_NAME);
+ confForJobs.setStrings(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHERS,
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT,
+// TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
+ EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceNoOpContainerLauncher.class.getName());
+ confForJobs.setStrings(TezConfiguration.TEZ_AM_TASK_COMMUNICATORS,
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT,
+// TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT,
+ EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceTaskCommunicatorImpl.class.getName());
- TezConfiguration tezConf = new TezConfiguration(confForJobs);
+ // Default all jobs to run via the service. Individual tests override this on a per vertex/dag level.
+ confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME,
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
+ confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME,
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
+ confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME,
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
+
+ // Setup various executor sets
+ PROPS_REGULAR_CONTAINERS.put(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME,
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
+ PROPS_REGULAR_CONTAINERS.put(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME,
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
+ PROPS_REGULAR_CONTAINERS.put(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME,
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT);
+
+ PROPS_EXT_SERVICE_PUSH.put(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME, EXT_PUSH_ENTITY_NAME);
+ PROPS_EXT_SERVICE_PUSH.put(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME, EXT_PUSH_ENTITY_NAME);
+ PROPS_EXT_SERVICE_PUSH.put(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME, EXT_PUSH_ENTITY_NAME);
+
+ PROPS_IN_AM.put(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME,
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT);
+ PROPS_IN_AM.put(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME,
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT);
+ PROPS_IN_AM.put(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME,
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT);
+
+
+ // Create a session to use for all tests.
+ TezConfiguration tezClientConf = new TezConfiguration(confForJobs);
sharedTezClient = TezClient.create(TestExternalTezServices.class.getSimpleName() + "_session",
- tezConf, true);
+ tezClientConf, true);
sharedTezClient.start();
LOG.info("Shared TezSession started");
sharedTezClient.waitTillReady();
LOG.info("Shared TezSession ready for submission");
+ // Generate the join data set used for each run.
+ // Can a timeout be enforced here ?
+ remoteFs.mkdirs(SRC_DATA_DIR);
+ Path dataPath1 = new Path(SRC_DATA_DIR, "inPath1");
+ Path dataPath2 = new Path(SRC_DATA_DIR, "inPath2");
+ TezConfiguration tezConf = new TezConfiguration(confForJobs);
+ // Generate join data - with 2 tasks.
+ JoinDataGen dataGen = new JoinDataGen();
+ String[] dataGenArgs = new String[]{
+ dataPath1.toString(), "1048576", dataPath2.toString(), "524288",
+ HASH_JOIN_EXPECTED_RESULT_PATH.toString(), "2"};
+ assertEquals(0, dataGen.run(tezConf, dataGenArgs, sharedTezClient));
+ // Run the actual join - with 2 reducers
+ HashJoinExample joinExample = new HashJoinExample();
+ String[] args = new String[]{
+ dataPath1.toString(), dataPath2.toString(), "2", HASH_JOIN_OUTPUT_PATH.toString()};
+ assertEquals(0, joinExample.run(tezConf, args, sharedTezClient));
+
+ LOG.info("Completed generating Data - Expected Hash Result and Actual Join Result");
}
@AfterClass
@@ -156,35 +217,50 @@ public class TestExternalTezServices {
@Test(timeout = 60000)
- public void test1() throws Exception {
- Path testDir = new Path("/tmp/testHashJoinExample");
+ public void testAllInService() throws Exception {
+ int expectedExternalSubmissions = 4 + 3; //4 for 4 src files, 3 for num reducers.
+ runJoinValidate("AllInService", expectedExternalSubmissions, PROPS_EXT_SERVICE_PUSH,
+ PROPS_EXT_SERVICE_PUSH, PROPS_EXT_SERVICE_PUSH);
+ }
- remoteFs.mkdirs(testDir);
+ @Test(timeout = 60000)
+ public void testAllInContainers() throws Exception {
+ int expectedExternalSubmissions = 0; // All in containers
+ runJoinValidate("AllInContainers", expectedExternalSubmissions, PROPS_REGULAR_CONTAINERS,
+ PROPS_REGULAR_CONTAINERS, PROPS_REGULAR_CONTAINERS);
+ }
- Path dataPath1 = new Path(testDir, "inPath1");
- Path dataPath2 = new Path(testDir, "inPath2");
- Path expectedOutputPath = new Path(testDir, "expectedOutputPath");
- Path outPath = new Path(testDir, "outPath");
+ @Test(timeout = 60000)
+ public void testMixed1() throws Exception { // M-ExtService, R-containers
+ int expectedExternalSubmissions = 4 + 0; //4 for 4 src files, 3 for num reducers.
+ runJoinValidate("Mixed1", expectedExternalSubmissions, PROPS_EXT_SERVICE_PUSH,
+ PROPS_EXT_SERVICE_PUSH, PROPS_REGULAR_CONTAINERS);
+ }
- TezConfiguration tezConf = new TezConfiguration(confForJobs);
+ @Test(timeout = 60000)
+ public void testMixed2() throws Exception { // M-Containers, R-ExtService
+ int expectedExternalSubmissions = 0 + 3; //4 for 4 src files, 3 for num reducers.
+ runJoinValidate("Mixed2", expectedExternalSubmissions, PROPS_REGULAR_CONTAINERS,
+ PROPS_REGULAR_CONTAINERS, PROPS_EXT_SERVICE_PUSH);
+ }
- JoinDataGen dataGen = new JoinDataGen();
- String[] dataGenArgs = new String[]{
- dataPath1.toString(), "1048576", dataPath2.toString(), "524288",
- expectedOutputPath.toString(), "2"};
- assertEquals(0, dataGen.run(tezConf, dataGenArgs, sharedTezClient));
- HashJoinExample joinExample = new HashJoinExample();
- String[] args = new String[]{
- dataPath1.toString(), dataPath2.toString(), "2", outPath.toString()};
- assertEquals(0, joinExample.run(tezConf, args, sharedTezClient));
+ private void runJoinValidate(String name, int extExpectedCount, Map<String, String> lhsProps,
+ Map<String, String> rhsProps,
+ Map<String, String> validateProps) throws
+ Exception {
+ int externalSubmissionCount = tezTestServiceCluster.getNumSubmissions();
- JoinValidate joinValidate = new JoinValidate();
- String[] validateArgs = new String[]{
- expectedOutputPath.toString(), outPath.toString(), "3"};
+ TezConfiguration tezConf = new TezConfiguration(confForJobs);
+ JoinValidateConfigured joinValidate =
+ new JoinValidateConfigured(lhsProps, rhsProps,
+ validateProps, name);
+ String[] validateArgs = new String[]{"-disableSplitGrouping",
+ HASH_JOIN_EXPECTED_RESULT_PATH.toString(), HASH_JOIN_OUTPUT_PATH.toString(), "3"};
assertEquals(0, joinValidate.run(tezConf, validateArgs, sharedTezClient));
// Ensure this was actually submitted to the external cluster
- assertTrue(tezTestServiceCluster.getNumSubmissions() > 0);
+ assertEquals(extExpectedCount,
+ (tezTestServiceCluster.getNumSubmissions() - externalSubmissionCount));
}
}
[15/43] tez git commit: TEZ-2410. VertexGroupCommitFinishedEvent &
VertexCommitStartedEvent is not logged correctly (zjffdu)
Posted by ss...@apache.org.
TEZ-2410. VertexGroupCommitFinishedEvent & VertexCommitStartedEvent is not logged correctly (zjffdu)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/4a6808ce
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/4a6808ce
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/4a6808ce
Branch: refs/heads/TEZ-2003
Commit: 4a6808ce4c99458653bbe4328dfcad24649a48fb
Parents: 05f77fe
Author: Jeff Zhang <zj...@apache.org>
Authored: Fri May 8 12:42:46 2015 +0800
Committer: Jeff Zhang <zj...@apache.org>
Committed: Fri May 8 12:42:46 2015 +0800
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../apache/tez/dag/app/dag/impl/DAGImpl.java | 61 ++-
.../apache/tez/dag/app/dag/impl/VertexImpl.java | 1 -
.../apache/tez/dag/app/dag/impl/TestCommit.java | 454 ++++++++++++++++++-
4 files changed, 477 insertions(+), 40 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/4a6808ce/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index ba8e9d8..3520768 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,7 @@ INCOMPATIBLE CHANGES
Default max limit increased. Should not affect existing users.
ALL CHANGES:
+ TEZ-2410. VertexGroupCommitFinishedEvent & VertexCommitStartedEvent is not logged correctly
TEZ-776. Reduce AM mem usage caused by storing TezEvents
TEZ-2423. Tez UI: Remove Attempt Index column from task->attempts page
TEZ-2416. Tez UI: Make tooltips display faster.
http://git-wip-us.apache.org/repos/asf/tez/blob/4a6808ce/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
index f769565..1726c18 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
@@ -212,7 +212,6 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
@VisibleForTesting
Map<OutputKey, ListenableFuture<Void>> commitFutures
= new HashMap<OutputKey, ListenableFuture<Void>>();
- private Set<OutputKey> succeededCommits = new HashSet<OutputKey>();
private static final DiagnosticsUpdateTransition
DIAGNOSTIC_UPDATE_TRANSITION = new DiagnosticsUpdateTransition();
@@ -457,7 +456,9 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
Set<String> outputs;
Map<String, InputDescriptor> edgeMergedInputs;
int successfulMembers;
- boolean committed;
+ int successfulCommits;
+ boolean commitStarted;
+
VertexGroupInfo(PlanVertexGroupInfo groupInfo) {
groupName = groupInfo.getGroupName();
groupMembers = Sets.newHashSet(groupInfo.getGroupMembersList());
@@ -468,10 +469,20 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
}
outputs = Sets.newHashSet(groupInfo.getOutputsList());
successfulMembers = 0;
- committed = false;
+ successfulCommits = 0;
+ commitStarted = false;
+ }
+
+ public boolean isInCommitting() {
+ return commitStarted && successfulCommits < outputs.size();
+ }
+
+ public boolean isCommitted() {
+ return commitStarted && successfulCommits == outputs.size();
}
}
+
public DAGImpl(TezDAGID dagId,
Configuration amConf,
DAGPlan jobPlan,
@@ -962,7 +973,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
// commit all shared outputs
for (final VertexGroupInfo groupInfo : vertexGroups.values()) {
if (!groupInfo.outputs.isEmpty()) {
- groupInfo.committed = true;
+ groupInfo.commitStarted = true;
final Vertex v = getVertex(groupInfo.groupMembers.iterator().next());
for (final String outputName : groupInfo.outputs) {
final OutputKey outputKey = new OutputKey(outputName, groupInfo.groupName, true);
@@ -1920,7 +1931,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
+ " data, groupName=" + groupInfo.groupName);
continue;
}
- groupInfo.committed = true;
+ groupInfo.commitStarted = true;
final Vertex v = getVertex(groupInfo.groupMembers.iterator().next());
try {
appContext.getHistoryHandler().handleCriticalEvent(new DAGHistoryEvent(getID(),
@@ -1966,11 +1977,19 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
+ ", vertexId=" + vertex.getVertexId());
if (!commitAllOutputsOnSuccess) {
- // partial output may already have been committed. fail if so
+ // partial output may already have been in committing or committed. fail if so
List<VertexGroupInfo> groupList = vertexGroupInfo.get(vertex.getName());
if (groupList != null) {
for (VertexGroupInfo groupInfo : groupList) {
- if (groupInfo.committed) {
+ if (groupInfo.isInCommitting()) {
+ String msg = "Aborting job as committing vertex: "
+ + vertex.getLogIdentifier() + " is re-running";
+ LOG.info(msg);
+ addDiagnostic(msg);
+ enactKill(DAGTerminationCause.VERTEX_RERUN_IN_COMMITTING,
+ VertexTerminationCause.VERTEX_RERUN_IN_COMMITTING);
+ return true;
+ } else if (groupInfo.isCommitted()) {
String msg = "Aborting job as committed vertex: "
+ vertex.getLogIdentifier() + " is re-running";
LOG.info(msg);
@@ -2091,17 +2110,23 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
boolean recoveryFailed = false;
if (commitCompletedEvent.isSucceeded()) {
LOG.info("Commit succeeded for output:" + commitCompletedEvent.getOutputKey());
- succeededCommits.add(commitCompletedEvent.getOutputKey());
- if (!commitAllOutputsOnSuccess) {
- try {
- appContext.getHistoryHandler().handleCriticalEvent(new DAGHistoryEvent(getID(),
- new VertexGroupCommitFinishedEvent(getID(), commitCompletedEvent.getOutputKey().getEntityName(),
- clock.getTime())));
- } catch (IOException e) {
- String diag = "Failed to send commit recovery event to handler, " + ExceptionUtils.getStackTrace(e);
- addDiagnostic(diag);
- LOG.error(diag);
- recoveryFailed = true;
+ OutputKey outputKey = commitCompletedEvent.getOutputKey();
+ if (outputKey.isVertexGroupOutput){
+ VertexGroupInfo vertexGroup = vertexGroups.get(outputKey.getEntityName());
+ vertexGroup.successfulCommits++;
+ if (vertexGroup.isCommitted()) {
+ if (!commitAllOutputsOnSuccess) {
+ try {
+ appContext.getHistoryHandler().handleCriticalEvent(new DAGHistoryEvent(getID(),
+ new VertexGroupCommitFinishedEvent(getID(), commitCompletedEvent.getOutputKey().getEntityName(),
+ clock.getTime())));
+ } catch (IOException e) {
+ String diag = "Failed to send commit recovery event to handler, " + ExceptionUtils.getStackTrace(e);
+ addDiagnostic(diag);
+ LOG.error(diag);
+ recoveryFailed = true;
+ }
+ }
}
}
} else {
http://git-wip-us.apache.org/repos/asf/tez/blob/4a6808ce/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
index a16ee0a..3a9558d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
@@ -1901,7 +1901,6 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
vertex.trySetTerminationCause(VertexTerminationCause.RECOVERY_ERROR);
return vertex.finished(VertexState.FAILED);
}
- } else {
firstCommit = false;
}
VertexCommitCallback commitCallback = new VertexCommitCallback(vertex, outputName);
http://git-wip-us.apache.org/repos/asf/tez/blob/4a6808ce/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
index 0df8a4f..8fc29c2 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
@@ -26,9 +26,9 @@ import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -63,7 +63,6 @@ import org.apache.tez.dag.api.OutputDescriptor;
import org.apache.tez.dag.api.ProcessorDescriptor;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.UserPayload;
-import org.apache.tez.dag.api.VertexManagerPlugin;
import org.apache.tez.dag.api.VertexManagerPluginContext;
import org.apache.tez.dag.api.VertexManagerPluginDescriptor;
import org.apache.tez.dag.api.client.VertexStatus;
@@ -96,11 +95,13 @@ import org.apache.tez.dag.app.dag.event.VertexEventTaskReschedule;
import org.apache.tez.dag.app.dag.event.VertexEventType;
import org.apache.tez.dag.app.dag.impl.DAGImpl.OutputKey;
import org.apache.tez.dag.history.DAGHistoryEvent;
+import org.apache.tez.dag.history.HistoryEvent;
import org.apache.tez.dag.history.HistoryEventHandler;
import org.apache.tez.dag.history.HistoryEventType;
+import org.apache.tez.dag.history.events.*;
import org.apache.tez.dag.records.TezDAGID;
import org.apache.tez.dag.records.TezTaskID;
-import org.apache.tez.runtime.api.Event;
+import org.apache.tez.dag.records.TezVertexID;
import org.apache.tez.runtime.api.OutputCommitter;
import org.apache.tez.runtime.api.OutputCommitterContext;
import org.apache.tez.runtime.api.events.VertexManagerEvent;
@@ -144,7 +145,7 @@ public class TestCommit {
private TaskHeartbeatHandler thh;
private Clock clock = new SystemClock();
private DAGFinishEventHandler dagFinishEventHandler;
- private HistoryEventHandler historyEventHandler;
+ private MockHistoryEventHandler historyEventHandler;
private TaskAttemptEventDispatcher taskAttemptEventDispatcher;
private ExecutorService rawExecutor;
@@ -305,7 +306,7 @@ public class TestCommit {
execService = MoreExecutors.listeningDecorator(rawExecutor);
doReturn(execService).when(appContext).getExecService();
- historyEventHandler = mock(HistoryEventHandler.class);
+ historyEventHandler = new MockHistoryEventHandler(appContext);
aclManager = new ACLManager("amUser");
doReturn(conf).when(appContext).getAMConf();
doReturn(appAttemptId).when(appContext).getApplicationAttemptId();
@@ -441,6 +442,63 @@ public class TestCommit {
return dag.createDag(conf, null, null, null, true);
}
+ // v1->v3
+ // v2->v3
+ // vertex_group (v1, v2) has 2 shared outputs
+ private DAGPlan createDAGPlanWith2VertexGroupOutputs(boolean vertexGroupCommitSucceeded1,
+ boolean vertexGroupCommitSucceeded2, boolean v3CommitSucceeded) throws Exception {
+ LOG.info("Setting up group dag plan");
+ int dummyTaskCount = 1;
+ Resource dummyTaskResource = Resource.newInstance(1, 1);
+ org.apache.tez.dag.api.Vertex v1 = org.apache.tez.dag.api.Vertex.create(
+ "vertex1", ProcessorDescriptor.create("Processor"), dummyTaskCount,
+ dummyTaskResource);
+ org.apache.tez.dag.api.Vertex v2 = org.apache.tez.dag.api.Vertex.create(
+ "vertex2", ProcessorDescriptor.create("Processor"), dummyTaskCount,
+ dummyTaskResource);
+ org.apache.tez.dag.api.Vertex v3 = org.apache.tez.dag.api.Vertex.create(
+ "vertex3", ProcessorDescriptor.create("Processor"), dummyTaskCount,
+ dummyTaskResource);
+
+ DAG dag = DAG.create("testDag");
+ String groupName1 = "uv12";
+ OutputCommitterDescriptor ocd1 = OutputCommitterDescriptor.create(
+ CountingOutputCommitter.class.getName()).setUserPayload(
+ UserPayload.create(ByteBuffer
+ .wrap(new CountingOutputCommitter.CountingOutputCommitterConfig(
+ !vertexGroupCommitSucceeded1, true).toUserPayload())));
+ OutputCommitterDescriptor ocd2 = OutputCommitterDescriptor.create(
+ CountingOutputCommitter.class.getName()).setUserPayload(
+ UserPayload.create(ByteBuffer
+ .wrap(new CountingOutputCommitter.CountingOutputCommitterConfig(
+ !vertexGroupCommitSucceeded2, true).toUserPayload())));
+ OutputCommitterDescriptor ocd3 = OutputCommitterDescriptor.create(
+ CountingOutputCommitter.class.getName()).setUserPayload(
+ UserPayload.create(ByteBuffer
+ .wrap(new CountingOutputCommitter.CountingOutputCommitterConfig(
+ !v3CommitSucceeded, true).toUserPayload())));
+
+ org.apache.tez.dag.api.VertexGroup uv12 = dag.createVertexGroup(groupName1,
+ v1, v2);
+ OutputDescriptor outDesc = OutputDescriptor.create("output.class");
+ uv12.addDataSink("v12Out1", DataSinkDescriptor.create(outDesc, ocd1, null));
+ uv12.addDataSink("v12Out2", DataSinkDescriptor.create(outDesc, ocd2, null));
+ v3.addDataSink("v3Out", DataSinkDescriptor.create(outDesc, ocd3, null));
+
+ GroupInputEdge e1 = GroupInputEdge.create(uv12, v3, EdgeProperty.create(
+ DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED,
+ SchedulingType.SEQUENTIAL,
+ OutputDescriptor.create("dummy output class"),
+ InputDescriptor.create("dummy input class")), InputDescriptor
+ .create("merge.class"));
+
+ dag.addVertex(v1);
+ dag.addVertex(v2);
+ dag.addVertex(v3);
+ dag.addEdge(e1);
+ return dag.createDag(conf, null, null, null, true);
+ }
+
private DAGPlan createDAGPlan_SingleVertexWith2Committer(
boolean commit1Succeed, boolean commit2Succeed) throws IOException {
return createDAGPlan_SingleVertexWith2Committer(commit1Succeed, commit2Succeed, false);
@@ -493,7 +551,7 @@ public class TestCommit {
}
@Test(timeout = 5000)
- public void testVertexSucceedWithoutCommit() throws Exception {
+ public void testVertexCommit_OnDAGSuccess() throws Exception {
conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS,
true);
setupDAG(createDAGPlan_SingleVertexWith2Committer(true, true));
@@ -509,6 +567,9 @@ public class TestCommit {
.getOutputCommitter("v1Out_1");
CountingOutputCommitter v1OutputCommitter_2 = (CountingOutputCommitter) v1
.getOutputCommitter("v1Out_2");
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+
Assert.assertEquals(1, v1OutputCommitter_1.initCounter);
Assert.assertEquals(1, v1OutputCommitter_1.setupCounter);
Assert.assertEquals(0, v1OutputCommitter_1.commitCounter);
@@ -544,6 +605,8 @@ public class TestCommit {
waitUntil(v1, VertexState.SUCCEEDED);
Assert.assertNull(v1.getTerminationCause());
Assert.assertTrue(v1.commitFutures.isEmpty());
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
Assert.assertEquals(1, v1OutputCommitter_1.initCounter);
Assert.assertEquals(1, v1OutputCommitter_1.setupCounter);
@@ -576,8 +639,11 @@ public class TestCommit {
Assert.assertEquals(VertexTerminationCause.COMMIT_FAILURE,
v1.getTerminationCause());
Assert.assertTrue(v1.commitFutures.isEmpty());
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
CountingOutputCommitter v1OutputCommitter_2 = (CountingOutputCommitter) v1
.getOutputCommitter("v1Out_2");
+
Assert.assertEquals(1, v1OutputCommitter_1.initCounter);
Assert.assertEquals(1, v1OutputCommitter_1.setupCounter);
Assert.assertEquals(1, v1OutputCommitter_1.commitCounter);
@@ -606,11 +672,13 @@ public class TestCommit {
v1OutputCommitter_1.unblockCommit();
waitForCommitCompleted(v1, "v1Out_1");
Assert.assertEquals(VertexState.COMMITTING, v1.getState());
-
+
CountingOutputCommitter v1OutputCommitter_2 = (CountingOutputCommitter) v1
.getOutputCommitter("v1Out_2");
v1OutputCommitter_2.unblockCommit();
waitUntil(v1, VertexState.FAILED);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
Assert.assertEquals(VertexTerminationCause.COMMIT_FAILURE,
v1.getTerminationCause());
@@ -647,6 +715,8 @@ public class TestCommit {
Assert.assertEquals(DAGState.KILLED, dag.getState());
Assert
.assertEquals(DAGTerminationCause.DAG_KILL, dag.getTerminationCause());
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
CountingOutputCommitter v1OutputCommitter_1 = (CountingOutputCommitter) v1
.getOutputCommitter("v1Out_1");
@@ -685,6 +755,8 @@ public class TestCommit {
Assert.assertEquals(DAGState.FAILED, dag.getState());
Assert.assertEquals(DAGTerminationCause.VERTEX_FAILURE,
dag.getTerminationCause());
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
CountingOutputCommitter v1OutputCommitter_1 = (CountingOutputCommitter) v1
.getOutputCommitter("v1Out_1");
@@ -727,6 +799,8 @@ public class TestCommit {
Assert.assertEquals(DAGState.FAILED, dag.getState());
Assert.assertEquals(DAGTerminationCause.VERTEX_FAILURE,
dag.getTerminationCause());
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
CountingOutputCommitter v1OutputCommitter_1 = (CountingOutputCommitter) v1
.getOutputCommitter("v1Out_1");
@@ -764,6 +838,8 @@ public class TestCommit {
Assert.assertEquals(DAGState.ERROR, dag.getState());
Assert.assertEquals(DAGTerminationCause.INTERNAL_ERROR,
dag.getTerminationCause());
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
CountingOutputCommitter v1OutputCommitter_1 = (CountingOutputCommitter) v1
.getOutputCommitter("v1Out_1");
@@ -811,6 +887,20 @@ public class TestCommit {
waitUntil(dag, DAGState.SUCCEEDED);
Assert.assertTrue(dag.commitFutures.isEmpty());
Assert.assertNull(dag.getTerminationCause());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("v1", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("v1", 0);
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("v3", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("v3", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 1);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
Assert.assertEquals(1, v12OutputCommitter.initCounter);
Assert.assertEquals(1, v12OutputCommitter.setupCounter);
@@ -857,10 +947,21 @@ public class TestCommit {
Assert.assertEquals(VertexState.SUCCEEDED, v1.getState());
Assert.assertEquals(VertexState.SUCCEEDED, v2.getState());
Assert.assertEquals(VertexState.SUCCEEDED, v3.getState());
-
Assert.assertEquals(DAGTerminationCause.COMMIT_FAILURE,
dag.getTerminationCause());
Assert.assertTrue(dag.commitFutures.isEmpty());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("v1", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("v1", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 1);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
Assert.assertEquals(1, v12OutputCommitter.initCounter);
Assert.assertEquals(1, v12OutputCommitter.setupCounter);
@@ -899,6 +1000,8 @@ public class TestCommit {
.getOutputCommitter("v3Out");
v12OutputCommitter.unblockCommit();
waitUntil(dag, DAGState.FAILED);
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
Assert.assertEquals(VertexState.SUCCEEDED, v1.getState());
Assert.assertEquals(VertexState.SUCCEEDED, v2.getState());
@@ -907,6 +1010,18 @@ public class TestCommit {
Assert.assertEquals(DAGTerminationCause.COMMIT_FAILURE,
dag.getTerminationCause());
Assert.assertTrue(dag.commitFutures.isEmpty());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("v1", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("v1", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 1);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
Assert.assertEquals(1, v12OutputCommitter.initCounter);
Assert.assertEquals(1, v12OutputCommitter.setupCounter);
@@ -953,6 +1068,18 @@ public class TestCommit {
v12OutputCommitter.unblockCommit();
waitUntil(dag, DAGState.SUCCEEDED);
Assert.assertTrue(dag.commitFutures.isEmpty());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 1);
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("v1", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("v1", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
Assert.assertEquals(1, v12OutputCommitter.initCounter);
Assert.assertEquals(1, v12OutputCommitter.setupCounter);
@@ -1001,6 +1128,18 @@ public class TestCommit {
v3OutputCommitter.unblockCommit();
waitUntil(dag, DAGState.SUCCEEDED);
Assert.assertTrue(dag.commitFutures.isEmpty());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 1);
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("v1", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("v1", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
Assert.assertEquals(1, v12OutputCommitter.initCounter);
Assert.assertEquals(1, v12OutputCommitter.setupCounter);
@@ -1013,6 +1152,71 @@ public class TestCommit {
Assert.assertEquals(0, v3OutputCommitter.abortCounter);
}
+ // test DAGCommitSucceeded when vertex group has multiple shared outputs
+ @Test(timeout = 5000)
+ public void testDAGCommitSucceeded3_OnVertexSuccess() throws Exception {
+ conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS,
+ false);
+ setupDAG(createDAGPlanWith2VertexGroupOutputs(true, true, true));
+ initDAG(dag);
+ startDAG(dag);
+ VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1");
+ VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2");
+ VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3");
+
+ v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(),
+ TaskState.SUCCEEDED));
+ v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(),
+ TaskState.SUCCEEDED));
+ v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(),
+ TaskState.SUCCEEDED));
+ Assert.assertEquals(VertexState.SUCCEEDED, v1.getState());
+ Assert.assertEquals(VertexState.SUCCEEDED, v2.getState());
+ Assert.assertEquals(VertexState.COMMITTING, v3.getState());
+ Assert.assertEquals(DAGState.RUNNING, dag.getState());
+
+ CountingOutputCommitter v12OutputCommitter1 = (CountingOutputCommitter) v1
+ .getOutputCommitter("v12Out1");
+ v12OutputCommitter1.unblockCommit();
+ CountingOutputCommitter v12OutputCommitter2 = (CountingOutputCommitter) v1
+ .getOutputCommitter("v12Out2");
+ v12OutputCommitter2.unblockCommit();
+ Assert.assertEquals(DAGState.RUNNING, dag.getState());
+
+ CountingOutputCommitter v3OutputCommitter = (CountingOutputCommitter) v3
+ .getOutputCommitter("v3Out");
+ v3OutputCommitter.unblockCommit();
+ waitUntil(dag, DAGState.SUCCEEDED);
+ Assert.assertTrue(dag.commitFutures.isEmpty());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 1);
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("v1", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("v1", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
+
+ Assert.assertEquals(1, v12OutputCommitter1.initCounter);
+ Assert.assertEquals(1, v12OutputCommitter1.setupCounter);
+ Assert.assertEquals(1, v12OutputCommitter1.commitCounter);
+ Assert.assertEquals(0, v12OutputCommitter1.abortCounter);
+
+ Assert.assertEquals(1, v12OutputCommitter2.initCounter);
+ Assert.assertEquals(1, v12OutputCommitter2.setupCounter);
+ Assert.assertEquals(1, v12OutputCommitter2.commitCounter);
+ Assert.assertEquals(0, v12OutputCommitter2.abortCounter);
+
+ Assert.assertEquals(1, v3OutputCommitter.initCounter);
+ Assert.assertEquals(1, v3OutputCommitter.setupCounter);
+ Assert.assertEquals(1, v3OutputCommitter.commitCounter);
+ Assert.assertEquals(0, v3OutputCommitter.abortCounter);
+ }
+
// commit of vertex group(v1,v2) fail and commit of v3 is not completed
@Test(timeout = 5000)
public void testDAGCommitFail1_OnVertexSuccess() throws Exception {
@@ -1048,6 +1252,16 @@ public class TestCommit {
Assert.assertEquals(DAGTerminationCause.COMMIT_FAILURE,
dag.getTerminationCause());
Assert.assertTrue(dag.commitFutures.isEmpty());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
Assert.assertEquals(1, v12OutputCommitter.initCounter);
Assert.assertEquals(1, v12OutputCommitter.setupCounter);
@@ -1097,6 +1311,16 @@ public class TestCommit {
Assert.assertEquals(DAGTerminationCause.VERTEX_FAILURE,
dag.getTerminationCause());
Assert.assertTrue(dag.commitFutures.isEmpty());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
.getOutputCommitter("v12Out");
@@ -1152,6 +1376,16 @@ public class TestCommit {
Assert.assertEquals(DAGTerminationCause.VERTEX_FAILURE,
dag.getTerminationCause());
Assert.assertTrue(dag.commitFutures.isEmpty());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
Assert.assertEquals(1, v12OutputCommitter.initCounter);
Assert.assertEquals(1, v12OutputCommitter.setupCounter);
@@ -1202,6 +1436,16 @@ public class TestCommit {
Assert.assertEquals(DAGTerminationCause.COMMIT_FAILURE,
dag.getTerminationCause());
Assert.assertTrue(dag.commitFutures.isEmpty());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
Assert.assertEquals(1, v12OutputCommitter.initCounter);
Assert.assertEquals(1, v12OutputCommitter.setupCounter);
@@ -1215,7 +1459,7 @@ public class TestCommit {
}
@Test (timeout = 5000)
- public void testDAGInternalErrorWhileCommiting() throws Exception {
+ public void testDAGInternalErrorWhileCommiting_OnDAGSuccess() throws Exception {
conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS,
true);
setupDAG(createDAGPlan(true, true));
@@ -1236,6 +1480,17 @@ public class TestCommit {
waitUntil(dag, DAGState.ERROR);
Assert.assertEquals(DAGTerminationCause.INTERNAL_ERROR, dag.getTerminationCause());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 1);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
+
CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
.getOutputCommitter("v12Out");
CountingOutputCommitter v3OutputCommitter = (CountingOutputCommitter) v3
@@ -1282,6 +1537,16 @@ public class TestCommit {
Assert
.assertEquals(DAGTerminationCause.DAG_KILL, dag.getTerminationCause());
Assert.assertTrue(dag.commitFutures.isEmpty());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 1);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
.getOutputCommitter("v12Out");
@@ -1335,6 +1600,16 @@ public class TestCommit {
Assert
.assertEquals(DAGTerminationCause.DAG_KILL, dag.getTerminationCause());
Assert.assertTrue(dag.commitFutures.isEmpty());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
.getOutputCommitter("v12Out");
@@ -1383,7 +1658,16 @@ public class TestCommit {
Assert
.assertEquals(DAGTerminationCause.DAG_KILL, dag.getTerminationCause());
Assert.assertTrue(dag.commitFutures.isEmpty());
-
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
.getOutputCommitter("v12Out");
@@ -1424,6 +1708,17 @@ public class TestCommit {
Assert.assertEquals(DAGTerminationCause.VERTEX_RERUN_IN_COMMITTING, dag.getTerminationCause());
Assert.assertTrue(dag.commitFutures.isEmpty());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 1);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
+
CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
.getOutputCommitter("v12Out");
CountingOutputCommitter v3OutputCommitter = (CountingOutputCommitter) v3
@@ -1461,6 +1756,17 @@ public class TestCommit {
waitUntil(dag, DAGState.ERROR);
Assert.assertEquals(DAGTerminationCause.INTERNAL_ERROR, dag.getTerminationCause());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 1);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
+
CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
.getOutputCommitter("v12Out");
CountingOutputCommitter v3OutputCommitter = (CountingOutputCommitter) v3
@@ -1476,14 +1782,12 @@ public class TestCommit {
Assert.assertEquals(1, v3OutputCommitter.abortCounter);
}
- @Test(timeout = 5000)
- public void testVertexGroupCommitFinishedEventFail() throws Exception {
+ @Test (timeout = 5000)
+ public void testVertexGroupCommitFinishedEventFail_OnVertexSuccess() throws Exception {
conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS,
false);
setupDAG(createDAGPlan(true, true));
- MockHistoryEventHandler mockHistoryEventHandler = new MockHistoryEventHandler(appContext);
- doReturn(mockHistoryEventHandler).when(appContext).getHistoryHandler();
- mockHistoryEventHandler.failVertexGroupCommitFinishedEvent = true;
+ historyEventHandler.failVertexGroupCommitFinishedEvent = true;
initDAG(dag);
startDAG(dag);
@@ -1503,6 +1807,16 @@ public class TestCommit {
.getOutputCommitter("v3Out");
v12OutputCommitter.unblockCommit();
waitUntil(dag, DAGState.FAILED);
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
Assert.assertEquals(DAGState.FAILED, dag.getState());
Assert.assertEquals(DAGTerminationCause.RECOVERY_FAILURE,
@@ -1524,13 +1838,11 @@ public class TestCommit {
}
@Test(timeout = 5000)
- public void testDAGCommitStartedEventFail() throws Exception {
+ public void testDAGCommitStartedEventFail_OnDAGSuccess() throws Exception {
conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS,
true);
setupDAG(createDAGPlan(true, true));
- MockHistoryEventHandler mockHistoryEventHandler = new MockHistoryEventHandler(appContext);
- doReturn(mockHistoryEventHandler).when(appContext).getHistoryHandler();
- mockHistoryEventHandler.failDAGCommitStartedEvent = true;
+ historyEventHandler.failDAGCommitStartedEvent = true;
initDAG(dag);
startDAG(dag);
@@ -1547,6 +1859,17 @@ public class TestCommit {
waitUntil(dag, DAGState.FAILED);
Assert.assertEquals(DAGTerminationCause.RECOVERY_FAILURE, dag.getTerminationCause());
Assert.assertTrue(dag.commitFutures.isEmpty());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 0);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
+
CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
.getOutputCommitter("v12Out");
CountingOutputCommitter v3OutputCommitter = (CountingOutputCommitter) v3
@@ -1567,7 +1890,7 @@ public class TestCommit {
// test commit will be canceled no matter it is started or still in the threadpool
// ControlledThreadPoolExecutor is used for to not schedule the commits
@Test(timeout = 5000)
- public void testCommitCanceled() throws Exception {
+ public void testCommitCanceled_OnDAGSuccess() throws Exception {
conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS,
true);
setupDAG(createDAGPlan(true, true));
@@ -1598,6 +1921,16 @@ public class TestCommit {
Assert.assertEquals(DAGTerminationCause.DAG_KILL, dag.getTerminationCause());
// mean the commits have been canceled
Assert.assertTrue(dag.commitFutures.isEmpty());
+ historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
+ historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
+ historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
+ historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
+ historyEventHandler.verifyVertexFinishedEvent(v3.getVertexId(), 1);
+ historyEventHandler.verifyDAGCommitStartedEvent(dag.getID(), 1);
+ historyEventHandler.verifyDAGFinishedEvent(dag.getID(), 1);
CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1
.getOutputCommitter("v12Out");
@@ -1635,7 +1968,7 @@ public class TestCommit {
public boolean failVertexGroupCommitFinishedEvent = false;
public boolean failDAGCommitStartedEvent = false;
-
+ public List<HistoryEvent> historyEvents = new ArrayList<HistoryEvent>();
public MockHistoryEventHandler(AppContext context) {
super(context);
}
@@ -1650,6 +1983,85 @@ public class TestCommit {
&& failDAGCommitStartedEvent) {
throw new IOException("fail DAGCommitStartedEvent");
}
+ historyEvents.add(event.getHistoryEvent());
+ }
+
+ public void verifyVertexGroupCommitStartedEvent(String groupName, int expectedTimes) {
+ int actualTimes = 0;
+ for (HistoryEvent event : historyEvents) {
+ if (event.getEventType() == HistoryEventType.VERTEX_GROUP_COMMIT_STARTED) {
+ VertexGroupCommitStartedEvent startedEvent = (VertexGroupCommitStartedEvent)event;
+ if (startedEvent.getVertexGroupName().equals(groupName)) {
+ actualTimes ++;
+ }
+ }
+ }
+ Assert.assertEquals(expectedTimes, actualTimes);
+ }
+
+ public void verifyVertexGroupCommitFinishedEvent(String groupName, int expectedTimes) {
+ int actualTimes = 0;
+ for (HistoryEvent event : historyEvents) {
+ if (event.getEventType() == HistoryEventType.VERTEX_GROUP_COMMIT_FINISHED) {
+ VertexGroupCommitFinishedEvent finishedEvent = (VertexGroupCommitFinishedEvent)event;
+ if (finishedEvent.getVertexGroupName().equals(groupName)) {
+ actualTimes ++;
+ }
+ }
+ }
+ Assert.assertEquals(expectedTimes, actualTimes);
+ }
+
+ public void verifyVertexCommitStartedEvent(TezVertexID vertexId, int expectedTimes) {
+ int actualTimes = 0;
+ for (HistoryEvent event : historyEvents) {
+ if (event.getEventType() == HistoryEventType.VERTEX_COMMIT_STARTED) {
+ VertexCommitStartedEvent startedEvent = (VertexCommitStartedEvent)event;
+ if (startedEvent.getVertexID().equals(vertexId)) {
+ actualTimes ++;
+ }
+ }
+ }
+ Assert.assertEquals(expectedTimes, actualTimes);
+ }
+
+ public void verifyVertexFinishedEvent(TezVertexID vertexId, int expectedTimes) {
+ int actualTimes = 0;
+ for (HistoryEvent event : historyEvents) {
+ if (event.getEventType() == HistoryEventType.VERTEX_FINISHED) {
+ VertexFinishedEvent finishedEvent = (VertexFinishedEvent)event;
+ if (finishedEvent.getVertexID().equals(vertexId)) {
+ actualTimes ++;
+ }
+ }
+ }
+ Assert.assertEquals(expectedTimes, actualTimes);
+ }
+
+ public void verifyDAGCommitStartedEvent(TezDAGID dagId, int expectedTimes) {
+ int actualTimes = 0;
+ for (HistoryEvent event : historyEvents) {
+ if (event.getEventType() == HistoryEventType.DAG_COMMIT_STARTED) {
+ DAGCommitStartedEvent startedEvent = (DAGCommitStartedEvent)event;
+ if (startedEvent.getDagID().equals(dagId)) {
+ actualTimes ++;
+ }
+ }
+ }
+ Assert.assertEquals(expectedTimes, actualTimes);
+ }
+
+ public void verifyDAGFinishedEvent(TezDAGID dagId, int expectedTimes) {
+ int actualTimes = 0;
+ for (HistoryEvent event : historyEvents) {
+ if (event.getEventType() == HistoryEventType.DAG_FINISHED) {
+ DAGFinishedEvent startedEvent = (DAGFinishedEvent)event;
+ if (startedEvent.getDagID().equals(dagId)) {
+ actualTimes ++;
+ }
+ }
+ }
+ Assert.assertEquals(expectedTimes, actualTimes);
}
}
[42/43] tez git commit: TEZ-2414. LogicalIOProcessorRuntimeTask,
RuntimeTask,
TezTaskRunner should handle interrupts & carry out necessary cleanups.
Contribtued by Rajesh Balamohan.
Posted by ss...@apache.org.
TEZ-2414. LogicalIOProcessorRuntimeTask, RuntimeTask, TezTaskRunner should handle interrupts & carry out necessary cleanups. Contribtued by Rajesh Balamohan.
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/ea972acd
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/ea972acd
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/ea972acd
Branch: refs/heads/TEZ-2003
Commit: ea972acdc2a48c763596d34acb53577279eb00f0
Parents: fdb9177
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed May 6 00:39:46 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:46:10 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../runtime/LogicalIOProcessorRuntimeTask.java | 141 +++++++++++++++++++
.../org/apache/tez/runtime/RuntimeTask.java | 5 +
.../apache/tez/runtime/task/TezTaskRunner.java | 71 +++++++++-
4 files changed, 216 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/ea972acd/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 9fc9ed3..f8a71e8 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -20,5 +20,6 @@ ALL CHANGES:
TEZ-2361. Propagate dag completion to TaskCommunicator.
TEZ-2381. Fixes after rebase 04/28.
TEZ-2388. Send dag identifier as part of the fetcher request string.
+ TEZ-2414. LogicalIOProcessorRuntimeTask, RuntimeTask, TezTaskRunner should handle interrupts & carry out necessary cleanups.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/ea972acd/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java
index 1cfe538..48c972c 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java
@@ -20,10 +20,14 @@ package org.apache.tez.runtime;
import java.io.Closeable;
import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.lang.management.ThreadInfo;
+import java.lang.management.ThreadMXBean;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -38,6 +42,7 @@ import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
+import com.google.common.base.Throwables;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.tez.runtime.api.TaskContext;
import org.apache.tez.runtime.api.impl.TezProcessorContextImpl;
@@ -111,6 +116,10 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
final List<GroupInputSpec> groupInputSpecs;
ConcurrentHashMap<String, MergedLogicalInput> groupInputsMap;
+ final ConcurrentHashMap<String, LogicalInput> initializedInputs;
+ final ConcurrentHashMap<String, LogicalOutput> initializedOutputs;
+
+ private boolean processorClosed;
final ProcessorDescriptor processorDescriptor;
AbstractLogicalIOProcessor processor;
ProcessorContext processorContext;
@@ -163,6 +172,9 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
this.runInputMap = new LinkedHashMap<String, LogicalInput>();
this.runOutputMap = new LinkedHashMap<String, LogicalOutput>();
+ this.initializedInputs = new ConcurrentHashMap<String, LogicalInput>();
+ this.initializedOutputs = new ConcurrentHashMap<String, LogicalOutput>();
+
this.processorDescriptor = taskSpec.getProcessorDescriptor();
this.serviceConsumerMetadata = serviceConsumerMetadata;
this.envMap = envMap;
@@ -341,11 +353,13 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
this.state.set(State.CLOSED);
// Close the Processor.
+ processorClosed = true;
processor.close();
// Close the Inputs.
for (InputSpec inputSpec : inputSpecs) {
String srcVertexName = inputSpec.getSourceVertexName();
+ initializedInputs.remove(srcVertexName);
List<Event> closeInputEvents = ((InputFrameworkInterface)inputsMap.get(srcVertexName)).close();
sendTaskGeneratedEvents(closeInputEvents,
EventProducerConsumerType.INPUT, taskSpec.getVertexName(),
@@ -355,6 +369,7 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
// Close the Outputs.
for (OutputSpec outputSpec : outputSpecs) {
String destVertexName = outputSpec.getDestinationVertexName();
+ initializedOutputs.remove(destVertexName);
List<Event> closeOutputEvents = ((LogicalOutputFrameworkInterface)outputsMap.get(destVertexName)).close();
sendTaskGeneratedEvents(closeOutputEvents,
EventProducerConsumerType.OUTPUT, taskSpec.getVertexName(),
@@ -402,6 +417,7 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
inputContext.getTaskVertexName(), inputContext.getSourceVertexName(),
taskSpec.getTaskAttemptID());
LOG.info("Initialized Input with src edge: " + edgeName);
+ initializedInputs.put(edgeName, input);
return null;
}
}
@@ -450,6 +466,7 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
outputContext.getTaskVertexName(),
outputContext.getDestinationVertexName(), taskSpec.getTaskAttemptID());
LOG.info("Initialized Output with dest edge: " + edgeName);
+ initializedOutputs.put(edgeName, output);
return null;
}
}
@@ -675,6 +692,13 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
eventsToBeProcessed.addAll(events);
}
+ @Override
+ public synchronized void abortTask() throws Exception {
+ if (processor != null) {
+ processor.abort();
+ }
+ }
+
private void startRouterThread() {
eventRouterThread = new Thread(new RunnableWithNdc() {
public void runInternal() {
@@ -694,6 +718,7 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
if (!isTaskDone()) {
LOG.warn("Event Router thread interrupted. Returning.");
}
+ Thread.currentThread().interrupt();
return;
}
}
@@ -705,6 +730,12 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
eventRouterThread.start();
}
+ private void maybeResetInterruptStatus() {
+ if (!Thread.currentThread().isInterrupted()) {
+ Thread.currentThread().interrupt();
+ }
+ }
+
private void closeContexts() throws IOException {
closeContext(inputContextMap);
closeContext(outputContextMap);
@@ -742,6 +773,102 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
}
eventRouterThread = null;
}
+
+ /**
+ * Cleanup IPO that are not closed. In case, regular close() has happened in IPO, they
+ * would not be available in the IPOs to be cleaned. So this is safe.
+ *
+ * e.g whenever input gets closed() in normal way, it automatically removes it from
+ * initializedInputs map.
+ *
+ * In case any exception happens in processor close or IO close, it wouldn't be removed from
+ * the initialized IO data structures and here is the chance to close them and release
+ * resources.
+ *
+ */
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Processor closed={}", processorClosed);
+ LOG.debug("Num of inputs to be closed={}", initializedInputs.size());
+ LOG.debug("Num of outputs to be closed={}", initializedOutputs.size());
+ }
+ if (!processorClosed) {
+ try {
+ processorClosed = true;
+ processor.close();
+
+ LOG.info("Closed processor for vertex={}, index={}, interruptedStatus={}",
+ processor
+ .getContext().getTaskVertexName(),
+ processor.getContext().getTaskVertexIndex(),
+ Thread.currentThread().isInterrupted());
+
+ maybeResetInterruptStatus();
+ } catch (InterruptedException ie) {
+ //reset the status
+ LOG.info("Resetting interrupt for processor");
+ Thread.currentThread().interrupt();
+ } catch (Throwable e) {
+ LOG.warn("Exception when closing processor", e);
+ }
+ }
+ // Close the remaining inited Inputs.
+ Iterator<String> srcVertexItr = initializedInputs.keySet().iterator();
+ while (srcVertexItr.hasNext()) {
+ String srcVertexName = srcVertexItr.next();
+ try {
+ srcVertexItr.remove();
+
+ ((InputFrameworkInterface) initializedInputs.get(srcVertexName)).close();
+ initializedInputs.remove(srcVertexName);
+
+ maybeResetInterruptStatus();
+ } catch (InterruptedException ie) {
+ //reset the status
+ LOG.info("Resetting interrupt status for input with srcVertexName={}",
+ srcVertexName);
+ Thread.currentThread().interrupt();
+ } catch (Throwable e) {
+ LOG.warn("Exception when closing input in cleanup(interrupted)", e);
+ } finally {
+ LOG.info("Close input for vertex={}, sourceVertex={}, interruptedStatus={}", processor
+ .getContext().getTaskVertexName(), srcVertexName, Thread.currentThread()
+ .isInterrupted());
+ }
+ }
+
+ // Close the remaining inited Outputs.
+ try {
+ Iterator<String> outVertexItr = initializedOutputs.keySet().iterator();
+ while (outVertexItr.hasNext()) {
+ String destVertexName = outVertexItr.next();
+ try {
+ outVertexItr.remove();
+
+ ((OutputFrameworkInterface) initializedOutputs.get(destVertexName)).close();
+ initializedOutputs.remove(destVertexName);
+
+ maybeResetInterruptStatus();
+ } catch (InterruptedException ie) {
+ //reset the status
+ LOG.info("Resetting interrupt status for output with destVertexName={}",
+ destVertexName);
+ Thread.currentThread().interrupt();
+ } catch (Throwable e) {
+ LOG.warn("Exception when closing output in cleanup(interrupted)", e);
+ } finally {
+ LOG.info("Close input for vertex={}, sourceVertex={}, interruptedStatus={}", processor
+ .getContext().getTaskVertexName(), destVertexName, Thread.currentThread()
+ .isInterrupted());
+ }
+ }
+ } catch (Throwable e) {
+ LOG.warn(Throwables.getStackTraceAsString(e));
+ }
+
+ if (LOG.isDebugEnabled()) {
+ printThreads();
+ }
+
try {
closeContexts();
// Cleanup references which may be held by misbehaved tasks.
@@ -785,6 +912,20 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask {
inputReadyTracker = null;
objectRegistry = null;
}
+
+
+ /**
+ * Print all threads in JVM (only for debugging)
+ */
+ void printThreads() {
+ //Print the status of all threads in JVM
+ ThreadMXBean threadMXBean = ManagementFactory.getThreadMXBean();
+ long[] threadIds = threadMXBean.getAllThreadIds();
+ for (Long id : threadIds) {
+ ThreadInfo threadInfo = threadMXBean.getThreadInfo(id);
+ LOG.info("ThreadId : " + id + ", name=" + threadInfo.getThreadName());
+ }
+ }
@Private
@VisibleForTesting
http://git-wip-us.apache.org/repos/asf/tez/blob/ea972acd/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java
index 921095c..7b09455 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java
@@ -74,6 +74,10 @@ public abstract class RuntimeTask {
protected final AtomicReference<State> state = new AtomicReference<State>();
+ public boolean isRunning() {
+ return (state.get() == State.RUNNING);
+ }
+
public TezCounters addAndGetTezCounter(String name) {
TezCounters counter = new TezCounters();
counterMap.put(name, counter);
@@ -153,4 +157,5 @@ public abstract class RuntimeTask {
taskDone.set(true);
}
+ public abstract void abortTask() throws Exception;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/ea972acd/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
index 33a7f4a..7238d5e 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
@@ -25,8 +25,13 @@ import java.security.PrivilegedExceptionAction;
import java.util.Collection;
import java.util.Map;
import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.Condition;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+import com.google.common.base.Throwables;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSError;
@@ -35,6 +40,7 @@ import org.apache.tez.common.CallableWithNdc;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.records.TezTaskAttemptID;
import org.apache.tez.runtime.LogicalIOProcessorRuntimeTask;
+import org.apache.tez.runtime.RuntimeTask;
import org.apache.tez.runtime.api.ExecutionContext;
import org.apache.tez.runtime.api.ObjectRegistry;
import org.apache.tez.runtime.api.impl.EventMetaData;
@@ -61,6 +67,7 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
private final ListeningExecutorService executor;
private volatile ListenableFuture<Void> taskFuture;
private volatile Thread waitingThread;
+ private volatile Thread taskRunner;
private volatile Throwable firstException;
// Effectively a duplicate check, since hadFatalError does the same thing.
@@ -96,7 +103,10 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
taskReporter.registerTask(task, this);
TaskRunnerCallable callable = new TaskRunnerCallable();
Throwable failureCause = null;
- taskFuture = executor.submit(callable);
+ if (!Thread.currentThread().isInterrupted()) {
+ taskFuture = executor.submit(callable);
+ return isShutdownRequested();
+ }
try {
taskFuture.get();
@@ -158,6 +168,10 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
}
}
}
+ return isShutdownRequested();
+ }
+
+ private boolean isShutdownRequested() {
if (shutdownRequested.get()) {
LOG.info("Shutdown requested... returning");
return false;
@@ -173,11 +187,14 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
@Override
public Void run() throws Exception {
try {
+ taskRunner = Thread.currentThread();
LOG.info("Initializing task" + ", taskAttemptId=" + task.getTaskAttemptID());
task.initialize();
if (!Thread.currentThread().isInterrupted() && firstException == null) {
LOG.info("Running task, taskAttemptId=" + task.getTaskAttemptID());
task.run();
+ maybeInterruptWaitingThread();
+
LOG.info("Closing task, taskAttemptId=" + task.getTaskAttemptID());
task.close();
task.setFrameworkCounters();
@@ -199,6 +216,12 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
}
return null;
} catch (Throwable cause) {
+ if (Thread.currentThread().isInterrupted()) {
+ LOG.info("TaskRunnerCallable interrupted=" + Thread.currentThread().isInterrupted()
+ + ", shutdownRequest=" + shutdownRequested.get());
+ Thread.currentThread().interrupt();
+ return null;
+ }
if (cause instanceof FSError) {
// Not immediately fatal, this is an error reported by Hadoop FileSystem
maybeRegisterFirstException(cause);
@@ -255,6 +278,17 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
taskRunning.set(false);
}
}
+
+ private void maybeInterruptWaitingThread() {
+ /**
+ * Possible that the processor is swallowing InterruptException of taskRunner.interrupt().
+ * In such case, interrupt the waitingThread based on the shutdownRequested flag, so that
+ * entire task gets cancelled.
+ */
+ if (shutdownRequested.get()) {
+ waitingThread.interrupt();
+ }
+ }
}
// should wait until all messages are sent to AM before TezChild shutdown
@@ -353,10 +387,43 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
}
}
+ private void abortRunningTask() {
+ if (!taskRunning.get()) {
+ LOG.info("Task is not running");
+ waitingThread.interrupt();
+ return;
+ }
+
+ if (taskRunning.get()) {
+ try {
+ task.abortTask();
+ } catch (Exception e) {
+ LOG.warn("Error when aborting the task", e);
+ try {
+ sendFailure(e, "Error when aborting the task");
+ } catch (Exception ignored) {
+ // Ignored.
+ }
+ }
+ }
+ //Interrupt the relevant threads. TaskRunner should be interrupted preferably.
+ if (isTaskRunning()) {
+ LOG.info("Interrupting taskRunner=" + taskRunner.getName());
+ taskRunner.interrupt();
+ } else {
+ LOG.info("Interrupting waitingThread=" + waitingThread.getName());
+ waitingThread.interrupt();
+ }
+ }
+
+ private boolean isTaskRunning() {
+ return (taskRunning.get() && task.isRunning());
+ }
+
@Override
public void shutdownRequested() {
shutdownRequested.set(true);
- waitingThread.interrupt();
+ abortRunningTask();
}
private String getTaskDiagnosticsString(Throwable t, String message) {
[23/43] tez git commit: TEZ-2090. Add tests for jobs running in
external services. (sseth)
Posted by ss...@apache.org.
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
new file mode 100644
index 0000000..4a6ce33
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
@@ -0,0 +1,512 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.service.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.nio.ByteBuffer;
+import java.security.PrivilegedExceptionAction;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Stopwatch;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Multimap;
+import com.google.common.util.concurrent.FutureCallback;
+import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.common.util.concurrent.ListeningExecutorService;
+import com.google.common.util.concurrent.MoreExecutors;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.security.SecurityUtil;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.util.AuxiliaryServiceHelper;
+import org.apache.log4j.Logger;
+import org.apache.tez.common.TezCommonUtils;
+import org.apache.tez.common.TezTaskUmbilicalProtocol;
+import org.apache.tez.common.security.JobTokenIdentifier;
+import org.apache.tez.common.security.TokenCache;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezException;
+import org.apache.tez.runtime.task.TaskReporter;
+import org.apache.tez.runtime.task.TezTaskRunner;
+import org.apache.tez.service.ContainerRunner;
+import org.apache.tez.dag.api.TezConstants;
+import org.apache.tez.runtime.api.ExecutionContext;
+import org.apache.tez.runtime.api.impl.ExecutionContextImpl;
+import org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl;
+import org.apache.tez.runtime.task.TezChild;
+import org.apache.tez.runtime.task.TezChild.ContainerExecutionResult;
+import org.apache.tez.shufflehandler.ShuffleHandler;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
+import org.apache.tez.util.ProtoConverters;
+
+public class ContainerRunnerImpl extends AbstractService implements ContainerRunner {
+
+ private static final Logger LOG = Logger.getLogger(ContainerRunnerImpl.class);
+
+ private final ListeningExecutorService executorService;
+ private final AtomicReference<InetSocketAddress> localAddress;
+ private final String[] localDirsBase;
+ private final Map<String, String> localEnv = new HashMap<String, String>();
+ private volatile FileSystem localFs;
+ private final long memoryPerExecutor;
+ // TODO Support for removing queued containers, interrupting / killing specific containers - when preemption is supported
+
+
+
+
+ public ContainerRunnerImpl(int numExecutors, String[] localDirsBase,
+ AtomicReference<InetSocketAddress> localAddress,
+ long totalMemoryAvailableBytes) {
+ super("ContainerRunnerImpl");
+ Preconditions.checkState(numExecutors > 0,
+ "Invalid number of executors: " + numExecutors + ". Must be > 0");
+ this.localDirsBase = localDirsBase;
+ this.localAddress = localAddress;
+
+ ExecutorService raw = Executors.newFixedThreadPool(numExecutors,
+ new ThreadFactoryBuilder().setNameFormat("ContainerExecutor %d").build());
+ this.executorService = MoreExecutors.listeningDecorator(raw);
+
+
+ // 80% of memory considered for accounted buffers. Rest for objects.
+ // TODO Tune this based on the available size.
+ this.memoryPerExecutor = (long)(totalMemoryAvailableBytes * 0.8 / (float) numExecutors);
+
+ LOG.info("ContainerRunnerImpl config: " +
+ "memoryPerExecutorDerived=" + memoryPerExecutor +
+ ", numExecutors=" + numExecutors
+ );
+ }
+
+ @Override
+ public void serviceInit(Configuration conf) {
+ try {
+ localFs = FileSystem.getLocal(conf);
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to setup local filesystem instance", e);
+ }
+ }
+
+ @Override
+ public void serviceStart() {
+ }
+
+ public void setShufflePort(int shufflePort) {
+ AuxiliaryServiceHelper.setServiceDataIntoEnv(
+ TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID,
+ ByteBuffer.allocate(4).putInt(shufflePort), localEnv);
+ }
+
+ @Override
+ protected void serviceStop() throws Exception {
+ super.serviceStop();
+ }
+
+ // TODO Move this into a utilities class
+ private static String createAppSpecificLocalDir(String baseDir, String applicationIdString,
+ String user) {
+ return baseDir + File.separator + "usercache" + File.separator + user + File.separator +
+ "appcache" + File.separator + applicationIdString;
+ }
+
+ /**
+ * Submit a container which is ready for running.
+ * The regular pull mechanism will be used to fetch work from the AM
+ * @param request
+ * @throws IOException
+ */
+ @Override
+ public void queueContainer(RunContainerRequestProto request) throws IOException {
+ LOG.info("Queuing container for execution: " + request);
+
+ Map<String, String> env = new HashMap<String, String>();
+ env.putAll(localEnv);
+ env.put(ApplicationConstants.Environment.USER.name(), request.getUser());
+
+ String[] localDirs = new String[localDirsBase.length];
+
+ // Setup up local dirs to be application specific, and create them.
+ for (int i = 0; i < localDirsBase.length; i++) {
+ localDirs[i] = createAppSpecificLocalDir(localDirsBase[i], request.getApplicationIdString(),
+ request.getUser());
+ localFs.mkdirs(new Path(localDirs[i]));
+ }
+ LOG.info("DEBUG: Dirs are: " + Arrays.toString(localDirs));
+
+
+ // Setup workingDir. This is otherwise setup as Environment.PWD
+ // Used for re-localization, to add the user specified configuration (conf_pb_binary_stream)
+ String workingDir = localDirs[0];
+
+ Credentials credentials = new Credentials();
+ DataInputBuffer dib = new DataInputBuffer();
+ byte[] tokenBytes = request.getCredentialsBinary().toByteArray();
+ dib.reset(tokenBytes, tokenBytes.length);
+ credentials.readTokenStorageStream(dib);
+
+ Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
+
+ // TODO Unregistering does not happen at the moment, since there's no signals on when an app completes.
+ LOG.info("DEBUG: Registering request with the ShuffleHandler");
+ ShuffleHandler.get().registerApplication(request.getApplicationIdString(), jobToken, request.getUser());
+
+
+ ContainerRunnerCallable callable = new ContainerRunnerCallable(request, new Configuration(getConfig()),
+ new ExecutionContextImpl(localAddress.get().getHostName()), env, localDirs,
+ workingDir, credentials, memoryPerExecutor);
+ ListenableFuture<ContainerExecutionResult> future = executorService
+ .submit(callable);
+ Futures.addCallback(future, new ContainerRunnerCallback(request, callable));
+ }
+
+ /**
+ * Submit an entire work unit - containerId + TaskSpec.
+ * This is intended for a task push from the AM
+ *
+ * @param request
+ * @throws IOException
+ */
+ @Override
+ public void submitWork(SubmitWorkRequestProto request) throws
+ IOException {
+ LOG.info("Queuing work for execution: " + request);
+
+ Map<String, String> env = new HashMap<String, String>();
+ env.putAll(localEnv);
+ env.put(ApplicationConstants.Environment.USER.name(), request.getUser());
+
+ String[] localDirs = new String[localDirsBase.length];
+
+ // Setup up local dirs to be application specific, and create them.
+ for (int i = 0; i < localDirsBase.length; i++) {
+ localDirs[i] = createAppSpecificLocalDir(localDirsBase[i], request.getApplicationIdString(),
+ request.getUser());
+ localFs.mkdirs(new Path(localDirs[i]));
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Dirs are: " + Arrays.toString(localDirs));
+ }
+
+ // Setup workingDir. This is otherwise setup as Environment.PWD
+ // Used for re-localization, to add the user specified configuration (conf_pb_binary_stream)
+ String workingDir = localDirs[0];
+
+ Credentials credentials = new Credentials();
+ DataInputBuffer dib = new DataInputBuffer();
+ byte[] tokenBytes = request.getCredentialsBinary().toByteArray();
+ dib.reset(tokenBytes, tokenBytes.length);
+ credentials.readTokenStorageStream(dib);
+
+ Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
+
+ // TODO Unregistering does not happen at the moment, since there's no signals on when an app completes.
+ LOG.info("DEBUG: Registering request with the ShuffleHandler");
+ ShuffleHandler.get().registerApplication(request.getApplicationIdString(), jobToken, request.getUser());
+ TaskRunnerCallable callable = new TaskRunnerCallable(request, new Configuration(getConfig()),
+ new ExecutionContextImpl(localAddress.get().getHostName()), env, localDirs,
+ workingDir, credentials, memoryPerExecutor);
+ ListenableFuture<ContainerExecutionResult> future = executorService.submit(callable);
+ Futures.addCallback(future, new TaskRunnerCallback(request, callable));
+ }
+
+
+ static class ContainerRunnerCallable implements Callable<ContainerExecutionResult> {
+
+ private final RunContainerRequestProto request;
+ private final Configuration conf;
+ private final String workingDir;
+ private final String[] localDirs;
+ private final Map<String, String> envMap;
+ private final String pid = null;
+ private final ObjectRegistryImpl objectRegistry;
+ private final ExecutionContext executionContext;
+ private final Credentials credentials;
+ private final long memoryAvailable;
+ private volatile TezChild tezChild;
+
+
+ ContainerRunnerCallable(RunContainerRequestProto request, Configuration conf,
+ ExecutionContext executionContext, Map<String, String> envMap,
+ String[] localDirs, String workingDir, Credentials credentials,
+ long memoryAvailable) {
+ this.request = request;
+ this.conf = conf;
+ this.executionContext = executionContext;
+ this.envMap = envMap;
+ this.workingDir = workingDir;
+ this.localDirs = localDirs;
+ this.objectRegistry = new ObjectRegistryImpl();
+ this.credentials = credentials;
+ this.memoryAvailable = memoryAvailable;
+
+ }
+
+ @Override
+ public ContainerExecutionResult call() throws Exception {
+ Stopwatch sw = new Stopwatch().start();
+ tezChild =
+ new TezChild(conf, request.getAmHost(), request.getAmPort(),
+ request.getContainerIdString(),
+ request.getTokenIdentifier(), request.getAppAttemptNumber(), workingDir, localDirs,
+ envMap, objectRegistry, pid,
+ executionContext, credentials, memoryAvailable, request.getUser());
+ ContainerExecutionResult result = tezChild.run();
+ LOG.info("ExecutionTime for Container: " + request.getContainerIdString() + "=" +
+ sw.stop().elapsedMillis());
+ return result;
+ }
+
+ public TezChild getTezChild() {
+ return this.tezChild;
+ }
+ }
+
+
+ final class ContainerRunnerCallback implements FutureCallback<ContainerExecutionResult> {
+
+ private final RunContainerRequestProto request;
+ private final ContainerRunnerCallable containerRunnerCallable;
+
+ ContainerRunnerCallback(RunContainerRequestProto request,
+ ContainerRunnerCallable containerRunnerCallable) {
+ this.request = request;
+ this.containerRunnerCallable = containerRunnerCallable;
+ }
+
+ // TODO Proper error handling
+ @Override
+ public void onSuccess(ContainerExecutionResult result) {
+ switch (result.getExitStatus()) {
+ case SUCCESS:
+ LOG.info("Successfully finished: " + request.getApplicationIdString() + ", containerId=" +
+ request.getContainerIdString());
+ break;
+ case EXECUTION_FAILURE:
+ LOG.info("Failed to run: " + request.getApplicationIdString() + ", containerId=" +
+ request.getContainerIdString(), result.getThrowable());
+ break;
+ case INTERRUPTED:
+ LOG.info(
+ "Interrupted while running: " + request.getApplicationIdString() + ", containerId=" +
+ request.getContainerIdString(), result.getThrowable());
+ break;
+ case ASKED_TO_DIE:
+ LOG.info(
+ "Asked to die while running: " + request.getApplicationIdString() + ", containerId=" +
+ request.getContainerIdString());
+ break;
+ }
+ }
+
+ @Override
+ public void onFailure(Throwable t) {
+ LOG.error(
+ "TezChild execution failed for : " + request.getApplicationIdString() + ", containerId=" +
+ request.getContainerIdString(), t);
+ TezChild tezChild = containerRunnerCallable.getTezChild();
+ if (tezChild != null) {
+ tezChild.shutdown();
+ }
+ }
+ }
+
+ static class TaskRunnerCallable implements Callable<ContainerExecutionResult> {
+
+ private final SubmitWorkRequestProto request;
+ private final Configuration conf;
+ private final String workingDir;
+ private final String[] localDirs;
+ private final Map<String, String> envMap;
+ private final String pid = null;
+ private final ObjectRegistryImpl objectRegistry;
+ private final ExecutionContext executionContext;
+ private final Credentials credentials;
+ private final long memoryAvailable;
+ private final ListeningExecutorService executor;
+ private volatile TezTaskRunner taskRunner;
+ private volatile TaskReporter taskReporter;
+ private TezTaskUmbilicalProtocol umbilical;
+
+
+ TaskRunnerCallable(SubmitWorkRequestProto request, Configuration conf,
+ ExecutionContext executionContext, Map<String, String> envMap,
+ String[] localDirs, String workingDir, Credentials credentials,
+ long memoryAvailable) {
+ this.request = request;
+ this.conf = conf;
+ this.executionContext = executionContext;
+ this.envMap = envMap;
+ this.workingDir = workingDir;
+ this.localDirs = localDirs;
+ this.objectRegistry = new ObjectRegistryImpl();
+ this.credentials = credentials;
+ this.memoryAvailable = memoryAvailable;
+ // TODO This executor seems unnecessary. Here and TezChild
+ ExecutorService executorReal = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder()
+ .setDaemon(true)
+ .setNameFormat("TezTaskRunner_" + request.getTaskSpec().getTaskAttemptIdString()).build());
+ executor = MoreExecutors.listeningDecorator(executorReal);
+ }
+
+ @Override
+ public ContainerExecutionResult call() throws Exception {
+
+ // TODO Consolidate this code with TezChild.
+ Stopwatch sw = new Stopwatch().start();
+ UserGroupInformation taskUgi = UserGroupInformation.createRemoteUser(request.getUser());
+ taskUgi.addCredentials(credentials);
+
+ Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
+ Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<String, ByteBuffer>();
+ serviceConsumerMetadata.put(TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID,
+ TezCommonUtils.convertJobTokenToBytes(jobToken));
+ Multimap<String, String> startedInputsMap = HashMultimap.create();
+
+ UserGroupInformation taskOwner =
+ UserGroupInformation.createRemoteUser(request.getTokenIdentifier());
+ final InetSocketAddress address =
+ NetUtils.createSocketAddrForHost(request.getAmHost(), request.getAmPort());
+ SecurityUtil.setTokenService(jobToken, address);
+ taskOwner.addToken(jobToken);
+ umbilical = taskOwner.doAs(new PrivilegedExceptionAction<TezTaskUmbilicalProtocol>() {
+ @Override
+ public TezTaskUmbilicalProtocol run() throws Exception {
+ return RPC.getProxy(TezTaskUmbilicalProtocol.class,
+ TezTaskUmbilicalProtocol.versionID, address, conf);
+ }
+ });
+ // TODO Stop reading this on each request.
+ taskReporter = new TaskReporter(
+ umbilical,
+ conf.getInt(TezConfiguration.TEZ_TASK_AM_HEARTBEAT_INTERVAL_MS,
+ TezConfiguration.TEZ_TASK_AM_HEARTBEAT_INTERVAL_MS_DEFAULT),
+ conf.getLong(
+ TezConfiguration.TEZ_TASK_AM_HEARTBEAT_COUNTER_INTERVAL_MS,
+ TezConfiguration.TEZ_TASK_AM_HEARTBEAT_COUNTER_INTERVAL_MS_DEFAULT),
+ conf.getInt(TezConfiguration.TEZ_TASK_MAX_EVENTS_PER_HEARTBEAT,
+ TezConfiguration.TEZ_TASK_MAX_EVENTS_PER_HEARTBEAT_DEFAULT),
+ new AtomicLong(0),
+ request.getContainerIdString());
+
+ taskRunner = new TezTaskRunner(conf, taskUgi, localDirs,
+ ProtoConverters.getTaskSpecfromProto(request.getTaskSpec()), umbilical,
+ request.getAppAttemptNumber(),
+ serviceConsumerMetadata, envMap, startedInputsMap, taskReporter, executor, objectRegistry,
+ pid,
+ executionContext, memoryAvailable);
+
+ boolean shouldDie;
+ try {
+ shouldDie = !taskRunner.run();
+ if (shouldDie) {
+ LOG.info("Got a shouldDie notification via hearbeats. Shutting down");
+ return new ContainerExecutionResult(ContainerExecutionResult.ExitStatus.SUCCESS, null,
+ "Asked to die by the AM");
+ }
+ } catch (IOException e) {
+ return new ContainerExecutionResult(ContainerExecutionResult.ExitStatus.EXECUTION_FAILURE,
+ e, "TaskExecutionFailure: " + e.getMessage());
+ } catch (TezException e) {
+ return new ContainerExecutionResult(ContainerExecutionResult.ExitStatus.EXECUTION_FAILURE,
+ e, "TaskExecutionFailure: " + e.getMessage());
+ } finally {
+ FileSystem.closeAllForUGI(taskUgi);
+ }
+ LOG.info("ExecutionTime for Container: " + request.getContainerIdString() + "=" +
+ sw.stop().elapsedMillis());
+ return new ContainerExecutionResult(ContainerExecutionResult.ExitStatus.SUCCESS, null,
+ null);
+ }
+
+ public void shutdown() {
+ executor.shutdownNow();
+ if (taskReporter != null) {
+ taskReporter.shutdown();
+ }
+ if (umbilical != null) {
+ RPC.stopProxy(umbilical);
+ }
+ }
+ }
+
+
+ final class TaskRunnerCallback implements FutureCallback<ContainerExecutionResult> {
+
+ private final SubmitWorkRequestProto request;
+ private final TaskRunnerCallable taskRunnerCallable;
+
+ TaskRunnerCallback(SubmitWorkRequestProto request,
+ TaskRunnerCallable containerRunnerCallable) {
+ this.request = request;
+ this.taskRunnerCallable = containerRunnerCallable;
+ }
+
+ // TODO Proper error handling
+ @Override
+ public void onSuccess(ContainerExecutionResult result) {
+ switch (result.getExitStatus()) {
+ case SUCCESS:
+ LOG.info("Successfully finished: " + request.getApplicationIdString() + ", containerId=" +
+ request.getContainerIdString());
+ break;
+ case EXECUTION_FAILURE:
+ LOG.info("Failed to run: " + request.getApplicationIdString() + ", containerId=" +
+ request.getContainerIdString(), result.getThrowable());
+ break;
+ case INTERRUPTED:
+ LOG.info(
+ "Interrupted while running: " + request.getApplicationIdString() + ", containerId=" +
+ request.getContainerIdString(), result.getThrowable());
+ break;
+ case ASKED_TO_DIE:
+ LOG.info(
+ "Asked to die while running: " + request.getApplicationIdString() + ", containerId=" +
+ request.getContainerIdString());
+ break;
+ }
+ taskRunnerCallable.shutdown();
+ }
+
+ @Override
+ public void onFailure(Throwable t) {
+ LOG.error(
+ "TezTaskRunner execution failed for : " + request.getApplicationIdString() + ", containerId=" +
+ request.getContainerIdString(), t);
+ taskRunnerCallable.shutdown();
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
new file mode 100644
index 0000000..012e352
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.service.impl;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.Arrays;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.log4j.Logger;
+import org.apache.tez.service.ContainerRunner;
+import org.apache.tez.shufflehandler.ShuffleHandler;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+
+public class TezTestService extends AbstractService implements ContainerRunner {
+
+ private static final Logger LOG = Logger.getLogger(TezTestService.class);
+
+ private final Configuration shuffleHandlerConf;
+ private final int numExecutors;
+
+ private final TezTestServiceProtocolServerImpl server;
+ private final ContainerRunnerImpl containerRunner;
+ private final String[] localDirs;
+
+ private final AtomicInteger numSubmissions = new AtomicInteger(0);
+
+
+ private final AtomicReference<InetSocketAddress> address = new AtomicReference<InetSocketAddress>();
+
+ public TezTestService(Configuration conf, int numExecutors, long memoryAvailable, String[] localDirs) {
+ super(TezTestService.class.getSimpleName());
+ this.numExecutors = numExecutors;
+ this.localDirs = localDirs;
+
+ long memoryAvailableBytes = memoryAvailable;
+ long jvmMax = Runtime.getRuntime().maxMemory();
+
+ LOG.info(TezTestService.class.getSimpleName() + " created with the following configuration: " +
+ "numExecutors=" + numExecutors +
+ ", workDirs=" + Arrays.toString(localDirs) +
+ ", memoryAvailable=" + memoryAvailable +
+ ", jvmMaxMemory=" + jvmMax);
+
+ Preconditions.checkArgument(this.numExecutors > 0);
+ Preconditions.checkArgument(this.localDirs != null && this.localDirs.length > 0,
+ "Work dirs must be specified");
+ Preconditions.checkState(jvmMax >= memoryAvailableBytes,
+ "Invalid configuration. Xmx value too small. maxAvailable=" + jvmMax + ", configured=" +
+ memoryAvailableBytes);
+
+ this.shuffleHandlerConf = new Configuration(conf);
+ // Start Shuffle on a random port
+ this.shuffleHandlerConf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);
+ this.shuffleHandlerConf.set(ShuffleHandler.SHUFFLE_HANDLER_LOCAL_DIRS, StringUtils.arrayToString(localDirs));
+
+ this.server = new TezTestServiceProtocolServerImpl(this, address);
+ this.containerRunner = new ContainerRunnerImpl(numExecutors, localDirs, address,
+ memoryAvailableBytes);
+ }
+
+ @Override
+ public void serviceInit(Configuration conf) {
+ server.init(conf);
+ containerRunner.init(conf);
+ }
+
+ @Override
+ public void serviceStart() throws Exception {
+ ShuffleHandler.initializeAndStart(shuffleHandlerConf);
+ containerRunner.setShufflePort(ShuffleHandler.get().getPort());
+ server.start();
+ containerRunner.start();
+ }
+
+ public void serviceStop() throws Exception {
+ containerRunner.stop();
+ server.stop();
+ ShuffleHandler.get().stop();
+ }
+
+ public InetSocketAddress getListenerAddress() {
+ return server.getBindAddress();
+ }
+
+ public int getShufflePort() {
+ return ShuffleHandler.get().getPort();
+ }
+
+
+
+ @Override
+ public void queueContainer(RunContainerRequestProto request) throws IOException {
+ numSubmissions.incrementAndGet();
+ containerRunner.queueContainer(request);
+ }
+
+ @Override
+ public void submitWork(TezTestServiceProtocolProtos.SubmitWorkRequestProto request) throws
+ IOException {
+ numSubmissions.incrementAndGet();
+ containerRunner.submitWork(request);
+ }
+
+ public int getNumSubmissions() {
+ return numSubmissions.get();
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolClientImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolClientImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolClientImpl.java
new file mode 100644
index 0000000..10d2952
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolClientImpl.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.service.impl;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+
+import com.google.protobuf.RpcController;
+import com.google.protobuf.ServiceException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ipc.ProtobufRpcEngine;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.tez.service.TezTestServiceProtocolBlockingPB;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerResponseProto;
+
+
+public class TezTestServiceProtocolClientImpl implements TezTestServiceProtocolBlockingPB {
+
+ private final Configuration conf;
+ private final InetSocketAddress serverAddr;
+ TezTestServiceProtocolBlockingPB proxy;
+
+
+ public TezTestServiceProtocolClientImpl(Configuration conf, String hostname, int port) {
+ this.conf = conf;
+ this.serverAddr = NetUtils.createSocketAddr(hostname, port);
+ }
+
+ @Override
+ public RunContainerResponseProto runContainer(RpcController controller,
+ RunContainerRequestProto request) throws
+ ServiceException {
+ try {
+ return getProxy().runContainer(null, request);
+ } catch (IOException e) {
+ throw new ServiceException(e);
+ }
+ }
+
+ @Override
+ public TezTestServiceProtocolProtos.SubmitWorkResponseProto submitWork(RpcController controller,
+ TezTestServiceProtocolProtos.SubmitWorkRequestProto request) throws
+ ServiceException {
+ try {
+ return getProxy().submitWork(null, request);
+ } catch (IOException e) {
+ throw new ServiceException(e);
+ }
+ }
+
+
+ public TezTestServiceProtocolBlockingPB getProxy() throws IOException {
+ if (proxy == null) {
+ proxy = createProxy();
+ }
+ return proxy;
+ }
+
+ public TezTestServiceProtocolBlockingPB createProxy() throws IOException {
+ TezTestServiceProtocolBlockingPB p;
+ // TODO Fix security
+ RPC.setProtocolEngine(conf, TezTestServiceProtocolBlockingPB.class, ProtobufRpcEngine.class);
+ p = (TezTestServiceProtocolBlockingPB) RPC
+ .getProxy(TezTestServiceProtocolBlockingPB.class, 0, serverAddr, conf);
+ return p;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
new file mode 100644
index 0000000..d7f8444
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.service.impl;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.concurrent.atomic.AtomicReference;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.protobuf.BlockingService;
+import com.google.protobuf.RpcController;
+import com.google.protobuf.ServiceException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ipc.ProtobufRpcEngine;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.tez.service.ContainerRunner;
+import org.apache.tez.service.TezTestServiceProtocolBlockingPB;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerResponseProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkResponseProto;
+
+public class TezTestServiceProtocolServerImpl extends AbstractService
+ implements TezTestServiceProtocolBlockingPB {
+
+ private static final Log LOG = LogFactory.getLog(TezTestServiceProtocolServerImpl.class);
+
+ private final ContainerRunner containerRunner;
+ private RPC.Server server;
+ private final AtomicReference<InetSocketAddress> bindAddress;
+
+
+ public TezTestServiceProtocolServerImpl(ContainerRunner containerRunner,
+ AtomicReference<InetSocketAddress> address) {
+ super(TezTestServiceProtocolServerImpl.class.getSimpleName());
+ this.containerRunner = containerRunner;
+ this.bindAddress = address;
+ }
+
+ @Override
+ public RunContainerResponseProto runContainer(RpcController controller,
+ RunContainerRequestProto request) throws
+ ServiceException {
+ LOG.info("Received request: " + request);
+ try {
+ containerRunner.queueContainer(request);
+ } catch (IOException e) {
+ throw new ServiceException(e);
+ }
+ return RunContainerResponseProto.getDefaultInstance();
+ }
+
+ @Override
+ public SubmitWorkResponseProto submitWork(RpcController controller, TezTestServiceProtocolProtos.SubmitWorkRequestProto request) throws
+ ServiceException {
+ LOG.info("Received submitWork request: " + request);
+ try {
+ containerRunner.submitWork(request);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ return SubmitWorkResponseProto.getDefaultInstance();
+ }
+
+
+ @Override
+ public void serviceStart() {
+ Configuration conf = getConfig();
+
+ int numHandlers = 3;
+ InetSocketAddress addr = new InetSocketAddress(0);
+
+ try {
+ server = createServer(TezTestServiceProtocolBlockingPB.class, addr, conf, numHandlers,
+ TezTestServiceProtocolProtos.TezTestServiceProtocol.newReflectiveBlockingService(this));
+ server.start();
+ } catch (IOException e) {
+ LOG.error("Failed to run RPC Server", e);
+ throw new RuntimeException(e);
+ }
+
+ InetSocketAddress serverBindAddress = NetUtils.getConnectAddress(server);
+ this.bindAddress.set(NetUtils.createSocketAddrForHost(
+ serverBindAddress.getAddress().getCanonicalHostName(),
+ serverBindAddress.getPort()));
+ LOG.info("Instantiated TestTestServiceListener at " + bindAddress);
+ }
+
+ @Override
+ public void serviceStop() {
+ if (server != null) {
+ server.stop();
+ }
+ }
+
+ @InterfaceAudience.Private
+ @VisibleForTesting
+ InetSocketAddress getBindAddress() {
+ return this.bindAddress.get();
+ }
+
+ private RPC.Server createServer(Class<?> pbProtocol, InetSocketAddress addr, Configuration conf,
+ int numHandlers, BlockingService blockingService) throws
+ IOException {
+ RPC.setProtocolEngine(conf, pbProtocol, ProtobufRpcEngine.class);
+ RPC.Server server = new RPC.Builder(conf)
+ .setProtocol(pbProtocol)
+ .setInstance(blockingService)
+ .setBindAddress(addr.getHostName())
+ .setPort(0)
+ .setNumHandlers(numHandlers)
+ .build();
+ // TODO Add security.
+ return server;
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java
new file mode 100644
index 0000000..65588fe
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedChunkedFile.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.shufflehandler;
+
+import java.io.FileDescriptor;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.ReadaheadPool;
+import org.apache.hadoop.io.ReadaheadPool.ReadaheadRequest;
+import org.apache.hadoop.io.nativeio.NativeIO;
+import org.jboss.netty.handler.stream.ChunkedFile;
+
+public class FadvisedChunkedFile extends ChunkedFile {
+
+ private static final Log LOG = LogFactory.getLog(FadvisedChunkedFile.class);
+
+ private final boolean manageOsCache;
+ private final int readaheadLength;
+ private final ReadaheadPool readaheadPool;
+ private final FileDescriptor fd;
+ private final String identifier;
+
+ private ReadaheadRequest readaheadRequest;
+
+ public FadvisedChunkedFile(RandomAccessFile file, long position, long count,
+ int chunkSize, boolean manageOsCache, int readaheadLength,
+ ReadaheadPool readaheadPool, String identifier) throws IOException {
+ super(file, position, count, chunkSize);
+ this.manageOsCache = manageOsCache;
+ this.readaheadLength = readaheadLength;
+ this.readaheadPool = readaheadPool;
+ this.fd = file.getFD();
+ this.identifier = identifier;
+ }
+
+ @Override
+ public Object nextChunk() throws Exception {
+ if (manageOsCache && readaheadPool != null) {
+ readaheadRequest = readaheadPool
+ .readaheadStream(identifier, fd, getCurrentOffset(), readaheadLength,
+ getEndOffset(), readaheadRequest);
+ }
+ return super.nextChunk();
+ }
+
+ @Override
+ public void close() throws Exception {
+ if (readaheadRequest != null) {
+ readaheadRequest.cancel();
+ }
+ if (manageOsCache && getEndOffset() - getStartOffset() > 0) {
+ try {
+ NativeIO.POSIX.getCacheManipulator().posixFadviseIfPossible(identifier,
+ fd,
+ getStartOffset(), getEndOffset() - getStartOffset(),
+ NativeIO.POSIX.POSIX_FADV_DONTNEED);
+ } catch (Throwable t) {
+ LOG.warn("Failed to manage OS cache for " + identifier, t);
+ }
+ }
+ super.close();
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java
new file mode 100644
index 0000000..bdffe52
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/FadvisedFileRegion.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.shufflehandler;
+
+import java.io.FileDescriptor;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.WritableByteChannel;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.ReadaheadPool;
+import org.apache.hadoop.io.ReadaheadPool.ReadaheadRequest;
+import org.apache.hadoop.io.nativeio.NativeIO;
+import org.jboss.netty.channel.DefaultFileRegion;
+
+public class FadvisedFileRegion extends DefaultFileRegion {
+
+ private static final Log LOG = LogFactory.getLog(FadvisedFileRegion.class);
+
+ private final boolean manageOsCache;
+ private final int readaheadLength;
+ private final ReadaheadPool readaheadPool;
+ private final FileDescriptor fd;
+ private final String identifier;
+ private final long count;
+ private final long position;
+ private final int shuffleBufferSize;
+ private final boolean shuffleTransferToAllowed;
+ private final FileChannel fileChannel;
+
+ private ReadaheadRequest readaheadRequest;
+
+ public FadvisedFileRegion(RandomAccessFile file, long position, long count,
+ boolean manageOsCache, int readaheadLength, ReadaheadPool readaheadPool,
+ String identifier, int shuffleBufferSize,
+ boolean shuffleTransferToAllowed) throws IOException {
+ super(file.getChannel(), position, count);
+ this.manageOsCache = manageOsCache;
+ this.readaheadLength = readaheadLength;
+ this.readaheadPool = readaheadPool;
+ this.fd = file.getFD();
+ this.identifier = identifier;
+ this.fileChannel = file.getChannel();
+ this.count = count;
+ this.position = position;
+ this.shuffleBufferSize = shuffleBufferSize;
+ this.shuffleTransferToAllowed = shuffleTransferToAllowed;
+ }
+
+ @Override
+ public long transferTo(WritableByteChannel target, long position)
+ throws IOException {
+ if (manageOsCache && readaheadPool != null) {
+ readaheadRequest = readaheadPool.readaheadStream(identifier, fd,
+ getPosition() + position, readaheadLength,
+ getPosition() + getCount(), readaheadRequest);
+ }
+
+ if(this.shuffleTransferToAllowed) {
+ return super.transferTo(target, position);
+ } else {
+ return customShuffleTransfer(target, position);
+ }
+ }
+
+ /**
+ * This method transfers data using local buffer. It transfers data from
+ * a disk to a local buffer in memory, and then it transfers data from the
+ * buffer to the target. This is used only if transferTo is disallowed in
+ * the configuration file. super.TransferTo does not perform well on Windows
+ * due to a small IO request generated. customShuffleTransfer can control
+ * the size of the IO requests by changing the size of the intermediate
+ * buffer.
+ */
+ @VisibleForTesting
+ long customShuffleTransfer(WritableByteChannel target, long position)
+ throws IOException {
+ long actualCount = this.count - position;
+ if (actualCount < 0 || position < 0) {
+ throw new IllegalArgumentException(
+ "position out of range: " + position +
+ " (expected: 0 - " + (this.count - 1) + ')');
+ }
+ if (actualCount == 0) {
+ return 0L;
+ }
+
+ long trans = actualCount;
+ int readSize;
+ ByteBuffer byteBuffer = ByteBuffer.allocate(this.shuffleBufferSize);
+
+ while(trans > 0L &&
+ (readSize = fileChannel.read(byteBuffer, this.position+position)) > 0) {
+ //adjust counters and buffer limit
+ if(readSize < trans) {
+ trans -= readSize;
+ position += readSize;
+ byteBuffer.flip();
+ } else {
+ //We can read more than we need if the actualCount is not multiple
+ //of the byteBuffer size and file is big enough. In that case we cannot
+ //use flip method but we need to set buffer limit manually to trans.
+ byteBuffer.limit((int)trans);
+ byteBuffer.position(0);
+ position += trans;
+ trans = 0;
+ }
+
+ //write data to the target
+ while(byteBuffer.hasRemaining()) {
+ target.write(byteBuffer);
+ }
+
+ byteBuffer.clear();
+ }
+
+ return actualCount - trans;
+ }
+
+
+ @Override
+ public void releaseExternalResources() {
+ if (readaheadRequest != null) {
+ readaheadRequest.cancel();
+ }
+ super.releaseExternalResources();
+ }
+
+ /**
+ * Call when the transfer completes successfully so we can advise the OS that
+ * we don't need the region to be cached anymore.
+ */
+ public void transferSuccessful() {
+ if (manageOsCache && getCount() > 0) {
+ try {
+ NativeIO.POSIX.getCacheManipulator().posixFadviseIfPossible(identifier,
+ fd, getPosition(), getCount(),
+ NativeIO.POSIX.POSIX_FADV_DONTNEED);
+ } catch (Throwable t) {
+ LOG.warn("Failed to manage OS cache for " + identifier, t);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java
new file mode 100644
index 0000000..9a51ca0
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java
@@ -0,0 +1,199 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tez.shufflehandler;
+
+import java.io.IOException;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.tez.runtime.library.common.Constants;
+import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord;
+import org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord;
+
+class IndexCache {
+
+ private final Configuration conf;
+ private final int totalMemoryAllowed;
+ private AtomicInteger totalMemoryUsed = new AtomicInteger();
+ private static final Log LOG = LogFactory.getLog(IndexCache.class);
+
+ private final ConcurrentHashMap<String,IndexInformation> cache =
+ new ConcurrentHashMap<String,IndexInformation>();
+
+ private final LinkedBlockingQueue<String> queue =
+ new LinkedBlockingQueue<String>();
+
+ public IndexCache(Configuration conf) {
+ this.conf = conf;
+ totalMemoryAllowed = 10 * 1024 * 1024;
+ LOG.info("IndexCache created with max memory = " + totalMemoryAllowed);
+ }
+
+ /**
+ * This method gets the index information for the given mapId and reduce.
+ * It reads the index file into cache if it is not already present.
+ * @param mapId
+ * @param reduce
+ * @param fileName The file to read the index information from if it is not
+ * already present in the cache
+ * @param expectedIndexOwner The expected owner of the index file
+ * @return The Index Information
+ * @throws IOException
+ */
+ public TezIndexRecord getIndexInformation(String mapId, int reduce,
+ Path fileName, String expectedIndexOwner)
+ throws IOException {
+
+ IndexInformation info = cache.get(mapId);
+
+ if (info == null) {
+ info = readIndexFileToCache(fileName, mapId, expectedIndexOwner);
+ } else {
+ synchronized(info) {
+ while (isUnderConstruction(info)) {
+ try {
+ info.wait();
+ } catch (InterruptedException e) {
+ throw new IOException("Interrupted waiting for construction", e);
+ }
+ }
+ }
+ LOG.debug("IndexCache HIT: MapId " + mapId + " found");
+ }
+
+ if (info.mapSpillRecord.size() == 0 ||
+ info.mapSpillRecord.size() <= reduce) {
+ throw new IOException("Invalid request " +
+ " Map Id = " + mapId + " Reducer = " + reduce +
+ " Index Info Length = " + info.mapSpillRecord.size());
+ }
+ return info.mapSpillRecord.getIndex(reduce);
+ }
+
+ private boolean isUnderConstruction(IndexInformation info) {
+ synchronized(info) {
+ return (null == info.mapSpillRecord);
+ }
+ }
+
+ private IndexInformation readIndexFileToCache(Path indexFileName,
+ String mapId,
+ String expectedIndexOwner)
+ throws IOException {
+ IndexInformation info;
+ IndexInformation newInd = new IndexInformation();
+ if ((info = cache.putIfAbsent(mapId, newInd)) != null) {
+ synchronized(info) {
+ while (isUnderConstruction(info)) {
+ try {
+ info.wait();
+ } catch (InterruptedException e) {
+ throw new IOException("Interrupted waiting for construction", e);
+ }
+ }
+ }
+ LOG.debug("IndexCache HIT: MapId " + mapId + " found");
+ return info;
+ }
+ LOG.debug("IndexCache MISS: MapId " + mapId + " not found") ;
+ TezSpillRecord tmp = null;
+ try {
+ tmp = new TezSpillRecord(indexFileName, conf, expectedIndexOwner);
+ } catch (Throwable e) {
+ tmp = new TezSpillRecord(0);
+ cache.remove(mapId);
+ throw new IOException("Error Reading IndexFile", e);
+ } finally {
+ synchronized (newInd) {
+ newInd.mapSpillRecord = tmp;
+ newInd.notifyAll();
+ }
+ }
+ queue.add(mapId);
+
+ if (totalMemoryUsed.addAndGet(newInd.getSize()) > totalMemoryAllowed) {
+ freeIndexInformation();
+ }
+ return newInd;
+ }
+
+ /**
+ * This method removes the map from the cache if index information for this
+ * map is loaded(size>0), index information entry in cache will not be
+ * removed if it is in the loading phrase(size=0), this prevents corruption
+ * of totalMemoryUsed. It should be called when a map output on this tracker
+ * is discarded.
+ * @param mapId The taskID of this map.
+ */
+ public void removeMap(String mapId) {
+ IndexInformation info = cache.get(mapId);
+ if (info == null || ((info != null) && isUnderConstruction(info))) {
+ return;
+ }
+ info = cache.remove(mapId);
+ if (info != null) {
+ totalMemoryUsed.addAndGet(-info.getSize());
+ if (!queue.remove(mapId)) {
+ LOG.warn("Map ID" + mapId + " not found in queue!!");
+ }
+ } else {
+ LOG.info("Map ID " + mapId + " not found in cache");
+ }
+ }
+
+ /**
+ * This method checks if cache and totolMemoryUsed is consistent.
+ * It is only used for unit test.
+ * @return True if cache and totolMemoryUsed is consistent
+ */
+ boolean checkTotalMemoryUsed() {
+ int totalSize = 0;
+ for (IndexInformation info : cache.values()) {
+ totalSize += info.getSize();
+ }
+ return totalSize == totalMemoryUsed.get();
+ }
+
+ /**
+ * Bring memory usage below totalMemoryAllowed.
+ */
+ private synchronized void freeIndexInformation() {
+ while (totalMemoryUsed.get() > totalMemoryAllowed) {
+ String s = queue.remove();
+ IndexInformation info = cache.remove(s);
+ if (info != null) {
+ totalMemoryUsed.addAndGet(-info.getSize());
+ }
+ }
+ }
+
+ private static class IndexInformation {
+ TezSpillRecord mapSpillRecord;
+
+ int getSize() {
+ return mapSpillRecord == null
+ ? 0
+ : mapSpillRecord.size() * Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java
new file mode 100644
index 0000000..cc82d74
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java
@@ -0,0 +1,840 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.shufflehandler;
+
+import static org.jboss.netty.buffer.ChannelBuffers.wrappedBuffer;
+import static org.jboss.netty.handler.codec.http.HttpHeaders.Names.CONTENT_TYPE;
+import static org.jboss.netty.handler.codec.http.HttpMethod.GET;
+import static org.jboss.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST;
+import static org.jboss.netty.handler.codec.http.HttpResponseStatus.FORBIDDEN;
+import static org.jboss.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR;
+import static org.jboss.netty.handler.codec.http.HttpResponseStatus.METHOD_NOT_ALLOWED;
+import static org.jboss.netty.handler.codec.http.HttpResponseStatus.NOT_FOUND;
+import static org.jboss.netty.handler.codec.http.HttpResponseStatus.OK;
+import static org.jboss.netty.handler.codec.http.HttpResponseStatus.UNAUTHORIZED;
+import static org.jboss.netty.handler.codec.http.HttpVersion.HTTP_1_1;
+
+import javax.crypto.SecretKey;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.net.InetSocketAddress;
+import java.net.URL;
+import java.nio.ByteBuffer;
+import java.nio.channels.ClosedChannelException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.regex.Pattern;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Preconditions;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.LocalDirAllocator;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DataInputByteBuffer;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.ReadaheadPool;
+import org.apache.hadoop.io.SecureIOUtils;
+import org.apache.hadoop.metrics2.annotation.Metric;
+import org.apache.hadoop.metrics2.annotation.Metrics;
+import org.apache.hadoop.metrics2.lib.MutableCounterInt;
+import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
+import org.apache.hadoop.security.ssl.SSLFactory;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.tez.common.security.JobTokenIdentifier;
+import org.apache.tez.common.security.JobTokenSecretManager;
+import org.apache.tez.runtime.library.common.security.SecureShuffleUtils;
+import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.ShuffleHeader;
+import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord;
+import org.jboss.netty.bootstrap.ServerBootstrap;
+import org.jboss.netty.buffer.ChannelBuffers;
+import org.jboss.netty.channel.Channel;
+import org.jboss.netty.channel.ChannelFactory;
+import org.jboss.netty.channel.ChannelFuture;
+import org.jboss.netty.channel.ChannelFutureListener;
+import org.jboss.netty.channel.ChannelHandlerContext;
+import org.jboss.netty.channel.ChannelPipeline;
+import org.jboss.netty.channel.ChannelPipelineFactory;
+import org.jboss.netty.channel.ChannelStateEvent;
+import org.jboss.netty.channel.Channels;
+import org.jboss.netty.channel.ExceptionEvent;
+import org.jboss.netty.channel.MessageEvent;
+import org.jboss.netty.channel.SimpleChannelUpstreamHandler;
+import org.jboss.netty.channel.group.ChannelGroup;
+import org.jboss.netty.channel.group.DefaultChannelGroup;
+import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory;
+import org.jboss.netty.handler.codec.frame.TooLongFrameException;
+import org.jboss.netty.handler.codec.http.DefaultHttpResponse;
+import org.jboss.netty.handler.codec.http.HttpChunkAggregator;
+import org.jboss.netty.handler.codec.http.HttpHeaders;
+import org.jboss.netty.handler.codec.http.HttpRequest;
+import org.jboss.netty.handler.codec.http.HttpRequestDecoder;
+import org.jboss.netty.handler.codec.http.HttpResponse;
+import org.jboss.netty.handler.codec.http.HttpResponseEncoder;
+import org.jboss.netty.handler.codec.http.HttpResponseStatus;
+import org.jboss.netty.handler.codec.http.QueryStringDecoder;
+import org.jboss.netty.handler.ssl.SslHandler;
+import org.jboss.netty.handler.stream.ChunkedWriteHandler;
+import org.jboss.netty.util.CharsetUtil;
+
+public class ShuffleHandler {
+
+ private static final Log LOG = LogFactory.getLog(ShuffleHandler.class);
+
+ public static final String SHUFFLE_HANDLER_LOCAL_DIRS = "tez.shuffle.handler.local-dirs";
+
+ public static final String SHUFFLE_MANAGE_OS_CACHE = "mapreduce.shuffle.manage.os.cache";
+ public static final boolean DEFAULT_SHUFFLE_MANAGE_OS_CACHE = true;
+
+ public static final String SHUFFLE_READAHEAD_BYTES = "mapreduce.shuffle.readahead.bytes";
+ public static final int DEFAULT_SHUFFLE_READAHEAD_BYTES = 4 * 1024 * 1024;
+
+ // pattern to identify errors related to the client closing the socket early
+ // idea borrowed from Netty SslHandler
+ private static final Pattern IGNORABLE_ERROR_MESSAGE = Pattern.compile(
+ "^.*(?:connection.*reset|connection.*closed|broken.*pipe).*$",
+ Pattern.CASE_INSENSITIVE);
+
+ private int port;
+ private final ChannelFactory selector;
+ private final ChannelGroup accepted = new DefaultChannelGroup();
+ protected HttpPipelineFactory pipelineFact;
+ private final int sslFileBufferSize;
+ private final Configuration conf;
+
+ private final ConcurrentMap<String, Boolean> registeredApps = new ConcurrentHashMap<String, Boolean>();
+
+ /**
+ * Should the shuffle use posix_fadvise calls to manage the OS cache during
+ * sendfile
+ */
+ private final boolean manageOsCache;
+ private final int readaheadLength;
+ private final int maxShuffleConnections;
+ private final int shuffleBufferSize;
+ private final boolean shuffleTransferToAllowed;
+ private final ReadaheadPool readaheadPool = ReadaheadPool.getInstance();
+
+ private Map<String,String> userRsrc;
+ private JobTokenSecretManager secretManager;
+
+ // TODO Fix this for tez.
+ public static final String MAPREDUCE_SHUFFLE_SERVICEID =
+ "mapreduce_shuffle";
+
+ public static final String SHUFFLE_PORT_CONFIG_KEY = "tez.shuffle.port";
+ public static final int DEFAULT_SHUFFLE_PORT = 15551;
+
+ // TODO Change configs to remove mapreduce references.
+ public static final String SHUFFLE_CONNECTION_KEEP_ALIVE_ENABLED =
+ "mapreduce.shuffle.connection-keep-alive.enable";
+ public static final boolean DEFAULT_SHUFFLE_CONNECTION_KEEP_ALIVE_ENABLED = false;
+
+ public static final String SHUFFLE_CONNECTION_KEEP_ALIVE_TIME_OUT =
+ "mapreduce.shuffle.connection-keep-alive.timeout";
+ public static final int DEFAULT_SHUFFLE_CONNECTION_KEEP_ALIVE_TIME_OUT = 5; //seconds
+
+ public static final String SHUFFLE_MAPOUTPUT_META_INFO_CACHE_SIZE =
+ "mapreduce.shuffle.mapoutput-info.meta.cache.size";
+ public static final int DEFAULT_SHUFFLE_MAPOUTPUT_META_INFO_CACHE_SIZE =
+ 1000;
+
+ public static final String CONNECTION_CLOSE = "close";
+
+ public static final String SUFFLE_SSL_FILE_BUFFER_SIZE_KEY =
+ "mapreduce.shuffle.ssl.file.buffer.size";
+
+ public static final int DEFAULT_SUFFLE_SSL_FILE_BUFFER_SIZE = 60 * 1024;
+
+ public static final String MAX_SHUFFLE_CONNECTIONS = "mapreduce.shuffle.max.connections";
+ public static final int DEFAULT_MAX_SHUFFLE_CONNECTIONS = 0; // 0 implies no limit
+
+ public static final String MAX_SHUFFLE_THREADS = "mapreduce.shuffle.max.threads";
+ // 0 implies Netty default of 2 * number of available processors
+ public static final int DEFAULT_MAX_SHUFFLE_THREADS = 0;
+
+ public static final String SHUFFLE_BUFFER_SIZE =
+ "mapreduce.shuffle.transfer.buffer.size";
+ public static final int DEFAULT_SHUFFLE_BUFFER_SIZE = 128 * 1024;
+
+ public static final String SHUFFLE_TRANSFERTO_ALLOWED =
+ "mapreduce.shuffle.transferTo.allowed";
+ public static final boolean DEFAULT_SHUFFLE_TRANSFERTO_ALLOWED = true;
+ public static final boolean WINDOWS_DEFAULT_SHUFFLE_TRANSFERTO_ALLOWED =
+ false;
+
+ final boolean connectionKeepAliveEnabled;
+ final int connectionKeepAliveTimeOut;
+ final int mapOutputMetaInfoCacheSize;
+ private static final AtomicBoolean started = new AtomicBoolean(false);
+ private static final AtomicBoolean initing = new AtomicBoolean(false);
+ private static ShuffleHandler INSTANCE;
+
+ @Metrics(about="Shuffle output metrics", context="mapred")
+ static class ShuffleMetrics implements ChannelFutureListener {
+ @Metric("Shuffle output in bytes")
+ MutableCounterLong shuffleOutputBytes;
+ @Metric("# of failed shuffle outputs")
+ MutableCounterInt shuffleOutputsFailed;
+ @Metric("# of succeeeded shuffle outputs")
+ MutableCounterInt shuffleOutputsOK;
+ @Metric("# of current shuffle connections")
+ MutableGaugeInt shuffleConnections;
+
+ @Override
+ public void operationComplete(ChannelFuture future) throws Exception {
+ if (future.isSuccess()) {
+ shuffleOutputsOK.incr();
+ } else {
+ shuffleOutputsFailed.incr();
+ }
+ shuffleConnections.decr();
+ }
+ }
+
+ public ShuffleHandler(Configuration conf) {
+ this.conf = conf;
+ manageOsCache = conf.getBoolean(SHUFFLE_MANAGE_OS_CACHE,
+ DEFAULT_SHUFFLE_MANAGE_OS_CACHE);
+
+ readaheadLength = conf.getInt(SHUFFLE_READAHEAD_BYTES,
+ DEFAULT_SHUFFLE_READAHEAD_BYTES);
+
+ maxShuffleConnections = conf.getInt(MAX_SHUFFLE_CONNECTIONS,
+ DEFAULT_MAX_SHUFFLE_CONNECTIONS);
+ int maxShuffleThreads = conf.getInt(MAX_SHUFFLE_THREADS,
+ DEFAULT_MAX_SHUFFLE_THREADS);
+ if (maxShuffleThreads == 0) {
+ maxShuffleThreads = 2 * Runtime.getRuntime().availableProcessors();
+ }
+
+ shuffleBufferSize = conf.getInt(SHUFFLE_BUFFER_SIZE,
+ DEFAULT_SHUFFLE_BUFFER_SIZE);
+
+ shuffleTransferToAllowed = conf.getBoolean(SHUFFLE_TRANSFERTO_ALLOWED,
+ (Shell.WINDOWS)?WINDOWS_DEFAULT_SHUFFLE_TRANSFERTO_ALLOWED:
+ DEFAULT_SHUFFLE_TRANSFERTO_ALLOWED);
+
+ ThreadFactory bossFactory = new ThreadFactoryBuilder()
+ .setNameFormat("ShuffleHandler Netty Boss #%d")
+ .build();
+ ThreadFactory workerFactory = new ThreadFactoryBuilder()
+ .setNameFormat("ShuffleHandler Netty Worker #%d")
+ .build();
+
+ selector = new NioServerSocketChannelFactory(
+ Executors.newCachedThreadPool(bossFactory),
+ Executors.newCachedThreadPool(workerFactory),
+ maxShuffleThreads);
+
+ sslFileBufferSize = conf.getInt(SUFFLE_SSL_FILE_BUFFER_SIZE_KEY,
+ DEFAULT_SUFFLE_SSL_FILE_BUFFER_SIZE);
+ connectionKeepAliveEnabled =
+ conf.getBoolean(SHUFFLE_CONNECTION_KEEP_ALIVE_ENABLED,
+ DEFAULT_SHUFFLE_CONNECTION_KEEP_ALIVE_ENABLED);
+ connectionKeepAliveTimeOut =
+ Math.max(1, conf.getInt(SHUFFLE_CONNECTION_KEEP_ALIVE_TIME_OUT,
+ DEFAULT_SHUFFLE_CONNECTION_KEEP_ALIVE_TIME_OUT));
+ mapOutputMetaInfoCacheSize =
+ Math.max(1, conf.getInt(SHUFFLE_MAPOUTPUT_META_INFO_CACHE_SIZE,
+ DEFAULT_SHUFFLE_MAPOUTPUT_META_INFO_CACHE_SIZE));
+
+ userRsrc = new ConcurrentHashMap<String,String>();
+ secretManager = new JobTokenSecretManager();
+ }
+
+
+ public void start() throws Exception {
+ ServerBootstrap bootstrap = new ServerBootstrap(selector);
+ try {
+ pipelineFact = new HttpPipelineFactory(conf);
+ } catch (Exception ex) {
+ throw new RuntimeException(ex);
+ }
+ bootstrap.setPipelineFactory(pipelineFact);
+ port = conf.getInt(SHUFFLE_PORT_CONFIG_KEY, DEFAULT_SHUFFLE_PORT);
+ Channel ch = bootstrap.bind(new InetSocketAddress(port));
+ accepted.add(ch);
+ port = ((InetSocketAddress)ch.getLocalAddress()).getPort();
+ conf.set(SHUFFLE_PORT_CONFIG_KEY, Integer.toString(port));
+ pipelineFact.SHUFFLE.setPort(port);
+ LOG.info("TezShuffleHandler" + " listening on port " + port);
+ }
+
+ public static void initializeAndStart(Configuration conf) throws Exception {
+ if (!initing.getAndSet(true)) {
+ INSTANCE = new ShuffleHandler(conf);
+ INSTANCE.start();
+ started.set(true);
+ }
+ }
+
+ public static ShuffleHandler get() {
+ Preconditions.checkState(started.get(), "ShuffleHandler must be started before invoking started");
+ return INSTANCE;
+ }
+
+ /**
+ * Serialize the shuffle port into a ByteBuffer for use later on.
+ * @param port the port to be sent to the ApplciationMaster
+ * @return the serialized form of the port.
+ */
+ public static ByteBuffer serializeMetaData(int port) throws IOException {
+ //TODO these bytes should be versioned
+ DataOutputBuffer port_dob = new DataOutputBuffer();
+ port_dob.writeInt(port);
+ return ByteBuffer.wrap(port_dob.getData(), 0, port_dob.getLength());
+ }
+
+ /**
+ * A helper function to deserialize the metadata returned by ShuffleHandler.
+ * @param meta the metadata returned by the ShuffleHandler
+ * @return the port the Shuffle Handler is listening on to serve shuffle data.
+ */
+ public static int deserializeMetaData(ByteBuffer meta) throws IOException {
+ //TODO this should be returning a class not just an int
+ DataInputByteBuffer in = new DataInputByteBuffer();
+ in.reset(meta);
+ int port = in.readInt();
+ return port;
+ }
+
+ /**
+ * A helper function to serialize the JobTokenIdentifier to be sent to the
+ * ShuffleHandler as ServiceData.
+ * @param jobToken the job token to be used for authentication of
+ * shuffle data requests.
+ * @return the serialized version of the jobToken.
+ */
+ public static ByteBuffer serializeServiceData(Token<JobTokenIdentifier> jobToken) throws IOException {
+ //TODO these bytes should be versioned
+ DataOutputBuffer jobToken_dob = new DataOutputBuffer();
+ jobToken.write(jobToken_dob);
+ return ByteBuffer.wrap(jobToken_dob.getData(), 0, jobToken_dob.getLength());
+ }
+
+ static Token<JobTokenIdentifier> deserializeServiceData(ByteBuffer secret) throws IOException {
+ DataInputByteBuffer in = new DataInputByteBuffer();
+ in.reset(secret);
+ Token<JobTokenIdentifier> jt = new Token<JobTokenIdentifier>();
+ jt.readFields(in);
+ return jt;
+ }
+
+ public int getPort() {
+ return port;
+ }
+
+ public void registerApplication(String applicationIdString, Token<JobTokenIdentifier> appToken,
+ String user) {
+ Boolean registered = registeredApps.putIfAbsent(applicationIdString, Boolean.valueOf(true));
+ if (registered == null) {
+ recordJobShuffleInfo(applicationIdString, user, appToken);
+ }
+ }
+
+ public void unregisterApplication(String applicationIdString) {
+ removeJobShuffleInfo(applicationIdString);
+ }
+
+
+ public void stop() throws Exception {
+ accepted.close().awaitUninterruptibly(10, TimeUnit.SECONDS);
+ if (selector != null) {
+ ServerBootstrap bootstrap = new ServerBootstrap(selector);
+ bootstrap.releaseExternalResources();
+ }
+ if (pipelineFact != null) {
+ pipelineFact.destroy();
+ }
+ }
+
+ protected Shuffle getShuffle(Configuration conf) {
+ return new Shuffle(conf);
+ }
+
+
+ private void addJobToken(String appIdString, String user,
+ Token<JobTokenIdentifier> jobToken) {
+ String jobIdString = appIdString.replace("application", "job");
+ userRsrc.put(jobIdString, user);
+ secretManager.addTokenForJob(jobIdString, jobToken);
+ LOG.info("Added token for " + jobIdString);
+ }
+
+ private void recordJobShuffleInfo(String appIdString, String user,
+ Token<JobTokenIdentifier> jobToken) {
+ addJobToken(appIdString, user, jobToken);
+ }
+
+ private void removeJobShuffleInfo(String appIdString) {
+ secretManager.removeTokenForJob(appIdString);
+ userRsrc.remove(appIdString);
+ }
+
+ class HttpPipelineFactory implements ChannelPipelineFactory {
+
+ final Shuffle SHUFFLE;
+ private SSLFactory sslFactory;
+
+ public HttpPipelineFactory(Configuration conf) throws Exception {
+ SHUFFLE = getShuffle(conf);
+ // TODO Setup SSL Shuffle
+// if (conf.getBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY,
+// MRConfig.SHUFFLE_SSL_ENABLED_DEFAULT)) {
+// LOG.info("Encrypted shuffle is enabled.");
+// sslFactory = new SSLFactory(SSLFactory.Mode.SERVER, conf);
+// sslFactory.init();
+// }
+ }
+
+ public void destroy() {
+ if (sslFactory != null) {
+ sslFactory.destroy();
+ }
+ }
+
+ @Override
+ public ChannelPipeline getPipeline() throws Exception {
+ ChannelPipeline pipeline = Channels.pipeline();
+ if (sslFactory != null) {
+ pipeline.addLast("ssl", new SslHandler(sslFactory.createSSLEngine()));
+ }
+ pipeline.addLast("decoder", new HttpRequestDecoder());
+ pipeline.addLast("aggregator", new HttpChunkAggregator(1 << 16));
+ pipeline.addLast("encoder", new HttpResponseEncoder());
+ pipeline.addLast("chunking", new ChunkedWriteHandler());
+ pipeline.addLast("shuffle", SHUFFLE);
+ return pipeline;
+ // TODO factor security manager into pipeline
+ // TODO factor out encode/decode to permit binary shuffle
+ // TODO factor out decode of index to permit alt. models
+ }
+
+ }
+
+ class Shuffle extends SimpleChannelUpstreamHandler {
+
+ private final Configuration conf;
+ private final IndexCache indexCache;
+ private final LocalDirAllocator lDirAlloc =
+ new LocalDirAllocator(SHUFFLE_HANDLER_LOCAL_DIRS);
+ private int port;
+
+ public Shuffle(Configuration conf) {
+ this.conf = conf;
+ indexCache = new IndexCache(conf);
+ this.port = conf.getInt(SHUFFLE_PORT_CONFIG_KEY, DEFAULT_SHUFFLE_PORT);
+ }
+
+ public void setPort(int port) {
+ this.port = port;
+ }
+
+ private List<String> splitMaps(List<String> mapq) {
+ if (null == mapq) {
+ return null;
+ }
+ final List<String> ret = new ArrayList<String>();
+ for (String s : mapq) {
+ Collections.addAll(ret, s.split(","));
+ }
+ return ret;
+ }
+
+ @Override
+ public void channelOpen(ChannelHandlerContext ctx, ChannelStateEvent evt)
+ throws Exception {
+ if ((maxShuffleConnections > 0) && (accepted.size() >= maxShuffleConnections)) {
+ LOG.info(String.format("Current number of shuffle connections (%d) is " +
+ "greater than or equal to the max allowed shuffle connections (%d)",
+ accepted.size(), maxShuffleConnections));
+ evt.getChannel().close();
+ return;
+ }
+ accepted.add(evt.getChannel());
+ super.channelOpen(ctx, evt);
+
+ }
+
+ @Override
+ public void messageReceived(ChannelHandlerContext ctx, MessageEvent evt)
+ throws Exception {
+ HttpRequest request = (HttpRequest) evt.getMessage();
+ if (request.getMethod() != GET) {
+ sendError(ctx, METHOD_NOT_ALLOWED);
+ return;
+ }
+ // Check whether the shuffle version is compatible
+ if (!ShuffleHeader.DEFAULT_HTTP_HEADER_NAME.equals(
+ request.getHeader(ShuffleHeader.HTTP_HEADER_NAME))
+ || !ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION.equals(
+ request.getHeader(ShuffleHeader.HTTP_HEADER_VERSION))) {
+ sendError(ctx, "Incompatible shuffle request version", BAD_REQUEST);
+ }
+ final Map<String,List<String>> q =
+ new QueryStringDecoder(request.getUri()).getParameters();
+ final List<String> keepAliveList = q.get("keepAlive");
+ boolean keepAliveParam = false;
+ if (keepAliveList != null && keepAliveList.size() == 1) {
+ keepAliveParam = Boolean.valueOf(keepAliveList.get(0));
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("KeepAliveParam : " + keepAliveList
+ + " : " + keepAliveParam);
+ }
+ }
+ final List<String> mapIds = splitMaps(q.get("map"));
+ final List<String> reduceQ = q.get("reduce");
+ final List<String> jobQ = q.get("job");
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("RECV: " + request.getUri() +
+ "\n mapId: " + mapIds +
+ "\n reduceId: " + reduceQ +
+ "\n jobId: " + jobQ +
+ "\n keepAlive: " + keepAliveParam);
+ }
+
+ if (mapIds == null || reduceQ == null || jobQ == null) {
+ sendError(ctx, "Required param job, map and reduce", BAD_REQUEST);
+ return;
+ }
+ if (reduceQ.size() != 1 || jobQ.size() != 1) {
+ sendError(ctx, "Too many job/reduce parameters", BAD_REQUEST);
+ return;
+ }
+ int reduceId;
+ String jobId;
+ try {
+ reduceId = Integer.parseInt(reduceQ.get(0));
+ jobId = jobQ.get(0);
+ } catch (NumberFormatException e) {
+ sendError(ctx, "Bad reduce parameter", BAD_REQUEST);
+ return;
+ } catch (IllegalArgumentException e) {
+ sendError(ctx, "Bad job parameter", BAD_REQUEST);
+ return;
+ }
+ final String reqUri = request.getUri();
+ if (null == reqUri) {
+ // TODO? add upstream?
+ sendError(ctx, FORBIDDEN);
+ return;
+ }
+ HttpResponse response = new DefaultHttpResponse(HTTP_1_1, OK);
+ try {
+ verifyRequest(jobId, ctx, request, response,
+ new URL("http", "", this.port, reqUri));
+ } catch (IOException e) {
+ LOG.warn("Shuffle failure ", e);
+ sendError(ctx, e.getMessage(), UNAUTHORIZED);
+ return;
+ }
+
+ Map<String, MapOutputInfo> mapOutputInfoMap =
+ new HashMap<String, MapOutputInfo>();
+ Channel ch = evt.getChannel();
+ String user = userRsrc.get(jobId);
+
+ // $x/$user/appcache/$appId/output/$mapId
+ // TODO: Once Shuffle is out of NM, this can use MR APIs to convert
+ // between App and Job
+ String outputBasePathStr = getBaseLocation(jobId, user);
+
+ try {
+ populateHeaders(mapIds, outputBasePathStr, user, reduceId, request,
+ response, keepAliveParam, mapOutputInfoMap);
+ } catch(IOException e) {
+ ch.write(response);
+ LOG.error("Shuffle error in populating headers :", e);
+ String errorMessage = getErrorMessage(e);
+ sendError(ctx,errorMessage , INTERNAL_SERVER_ERROR);
+ return;
+ }
+ ch.write(response);
+ // TODO refactor the following into the pipeline
+ ChannelFuture lastMap = null;
+ for (String mapId : mapIds) {
+ try {
+ MapOutputInfo info = mapOutputInfoMap.get(mapId);
+ if (info == null) {
+ info = getMapOutputInfo(outputBasePathStr, mapId, reduceId, user);
+ }
+ lastMap =
+ sendMapOutput(ctx, ch, user, mapId,
+ reduceId, info);
+ if (null == lastMap) {
+ sendError(ctx, NOT_FOUND);
+ return;
+ }
+ } catch (IOException e) {
+ LOG.error("Shuffle error :", e);
+ String errorMessage = getErrorMessage(e);
+ sendError(ctx,errorMessage , INTERNAL_SERVER_ERROR);
+ return;
+ }
+ }
+ lastMap.addListener(ChannelFutureListener.CLOSE);
+ }
+
+ private String getErrorMessage(Throwable t) {
+ StringBuffer sb = new StringBuffer(t.getMessage());
+ while (t.getCause() != null) {
+ sb.append(t.getCause().getMessage());
+ t = t.getCause();
+ }
+ return sb.toString();
+ }
+
+ private final String USERCACHE_CONSTANT = "usercache";
+ private final String APPCACHE_CONSTANT = "appcache";
+
+ private String getBaseLocation(String jobIdString, String user) {
+ String parts[] = jobIdString.split("_");
+ Preconditions.checkArgument(parts.length == 3, "Invalid jobId. Expecting 3 parts");
+ final ApplicationId appID =
+ ApplicationId.newInstance(Long.parseLong(parts[1]), Integer.parseInt(parts[2]));
+ final String baseStr =
+ USERCACHE_CONSTANT + "/" + user + "/"
+ + APPCACHE_CONSTANT + "/"
+ + ConverterUtils.toString(appID) + "/output" + "/";
+ return baseStr;
+ }
+
+ protected MapOutputInfo getMapOutputInfo(String base, String mapId,
+ int reduce, String user) throws IOException {
+ // Index file
+ Path indexFileName =
+ lDirAlloc.getLocalPathToRead(base + "/file.out.index", conf);
+ TezIndexRecord info =
+ indexCache.getIndexInformation(mapId, reduce, indexFileName, user);
+
+ Path mapOutputFileName =
+ lDirAlloc.getLocalPathToRead(base + "/file.out", conf);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(base + " : " + mapOutputFileName + " : " + indexFileName);
+ }
+ MapOutputInfo outputInfo = new MapOutputInfo(mapOutputFileName, info);
+ return outputInfo;
+ }
+
+ protected void populateHeaders(List<String> mapIds, String outputBaseStr,
+ String user, int reduce, HttpRequest request, HttpResponse response,
+ boolean keepAliveParam, Map<String, MapOutputInfo> mapOutputInfoMap)
+ throws IOException {
+
+ long contentLength = 0;
+ for (String mapId : mapIds) {
+ String base = outputBaseStr + mapId;
+ MapOutputInfo outputInfo = getMapOutputInfo(base, mapId, reduce, user);
+ if (mapOutputInfoMap.size() < mapOutputMetaInfoCacheSize) {
+ mapOutputInfoMap.put(mapId, outputInfo);
+ }
+ // Index file
+ Path indexFileName =
+ lDirAlloc.getLocalPathToRead(base + "/file.out.index", conf);
+ TezIndexRecord info =
+ indexCache.getIndexInformation(mapId, reduce, indexFileName, user);
+ ShuffleHeader header =
+ new ShuffleHeader(mapId, info.getPartLength(), info.getRawLength(), reduce);
+ DataOutputBuffer dob = new DataOutputBuffer();
+ header.write(dob);
+
+ contentLength += info.getPartLength();
+ contentLength += dob.getLength();
+ }
+
+ // Now set the response headers.
+ setResponseHeaders(response, keepAliveParam, contentLength);
+ }
+
+ protected void setResponseHeaders(HttpResponse response,
+ boolean keepAliveParam, long contentLength) {
+ if (!connectionKeepAliveEnabled && !keepAliveParam) {
+ LOG.info("Setting connection close header...");
+ response.setHeader(HttpHeaders.Names.CONNECTION, CONNECTION_CLOSE);
+ } else {
+ response.setHeader(HttpHeaders.Names.CONTENT_LENGTH,
+ String.valueOf(contentLength));
+ response.setHeader(HttpHeaders.Names.CONNECTION, HttpHeaders.Values.KEEP_ALIVE);
+ response.setHeader(HttpHeaders.Values.KEEP_ALIVE, "timeout="
+ + connectionKeepAliveTimeOut);
+ LOG.info("Content Length in shuffle : " + contentLength);
+ }
+ }
+
+ class MapOutputInfo {
+ final Path mapOutputFileName;
+ final TezIndexRecord indexRecord;
+
+ MapOutputInfo(Path mapOutputFileName, TezIndexRecord indexRecord) {
+ this.mapOutputFileName = mapOutputFileName;
+ this.indexRecord = indexRecord;
+ }
+ }
+
+ protected void verifyRequest(String appid, ChannelHandlerContext ctx,
+ HttpRequest request, HttpResponse response, URL requestUri)
+ throws IOException {
+ SecretKey tokenSecret = secretManager.retrieveTokenSecret(appid);
+ if (null == tokenSecret) {
+ LOG.info("Request for unknown token " + appid);
+ throw new IOException("could not find jobid");
+ }
+ // string to encrypt
+ String enc_str = SecureShuffleUtils.buildMsgFrom(requestUri);
+ // hash from the fetcher
+ String urlHashStr =
+ request.getHeader(SecureShuffleUtils.HTTP_HEADER_URL_HASH);
+ if (urlHashStr == null) {
+ LOG.info("Missing header hash for " + appid);
+ throw new IOException("fetcher cannot be authenticated");
+ }
+ if (LOG.isDebugEnabled()) {
+ int len = urlHashStr.length();
+ LOG.debug("verifying request. enc_str=" + enc_str + "; hash=..." +
+ urlHashStr.substring(len-len/2, len-1));
+ }
+ // verify - throws exception
+ SecureShuffleUtils.verifyReply(urlHashStr, enc_str, tokenSecret);
+ // verification passed - encode the reply
+ String reply =
+ SecureShuffleUtils.generateHash(urlHashStr.getBytes(Charsets.UTF_8),
+ tokenSecret);
+ response.setHeader(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH, reply);
+ // Put shuffle version into http header
+ response.setHeader(ShuffleHeader.HTTP_HEADER_NAME,
+ ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
+ response.setHeader(ShuffleHeader.HTTP_HEADER_VERSION,
+ ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
+ if (LOG.isDebugEnabled()) {
+ int len = reply.length();
+ LOG.debug("Fetcher request verfied. enc_str=" + enc_str + ";reply=" +
+ reply.substring(len-len/2, len-1));
+ }
+ }
+
+ protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, Channel ch,
+ String user, String mapId, int reduce, MapOutputInfo mapOutputInfo)
+ throws IOException {
+ final TezIndexRecord info = mapOutputInfo.indexRecord;
+ final ShuffleHeader header =
+ new ShuffleHeader(mapId, info.getPartLength(), info.getRawLength(), reduce);
+ final DataOutputBuffer dob = new DataOutputBuffer();
+ header.write(dob);
+ ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength()));
+ final File spillfile =
+ new File(mapOutputInfo.mapOutputFileName.toString());
+ RandomAccessFile spill;
+ try {
+ spill = SecureIOUtils.openForRandomRead(spillfile, "r", user, null);
+ } catch (FileNotFoundException e) {
+ LOG.info(spillfile + " not found");
+ return null;
+ }
+ ChannelFuture writeFuture;
+ if (ch.getPipeline().get(SslHandler.class) == null) {
+ final FadvisedFileRegion partition = new FadvisedFileRegion(spill,
+ info.getStartOffset(), info.getPartLength(), manageOsCache, readaheadLength,
+ readaheadPool, spillfile.getAbsolutePath(),
+ shuffleBufferSize, shuffleTransferToAllowed);
+ writeFuture = ch.write(partition);
+ writeFuture.addListener(new ChannelFutureListener() {
+ // TODO error handling; distinguish IO/connection failures,
+ // attribute to appropriate spill output
+ @Override
+ public void operationComplete(ChannelFuture future) {
+ if (future.isSuccess()) {
+ partition.transferSuccessful();
+ }
+ partition.releaseExternalResources();
+ }
+ });
+ } else {
+ // HTTPS cannot be done with zero copy.
+ final FadvisedChunkedFile chunk = new FadvisedChunkedFile(spill,
+ info.getStartOffset(), info.getPartLength(), sslFileBufferSize,
+ manageOsCache, readaheadLength, readaheadPool,
+ spillfile.getAbsolutePath());
+ writeFuture = ch.write(chunk);
+ }
+ return writeFuture;
+ }
+
+ protected void sendError(ChannelHandlerContext ctx,
+ HttpResponseStatus status) {
+ sendError(ctx, "", status);
+ }
+
+ protected void sendError(ChannelHandlerContext ctx, String message,
+ HttpResponseStatus status) {
+ HttpResponse response = new DefaultHttpResponse(HTTP_1_1, status);
+ response.setHeader(CONTENT_TYPE, "text/plain; charset=UTF-8");
+ // Put shuffle version into http header
+ response.setHeader(ShuffleHeader.HTTP_HEADER_NAME,
+ ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
+ response.setHeader(ShuffleHeader.HTTP_HEADER_VERSION,
+ ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
+ response.setContent(
+ ChannelBuffers.copiedBuffer(message, CharsetUtil.UTF_8));
+
+ // Close the connection as soon as the error message is sent.
+ ctx.getChannel().write(response).addListener(ChannelFutureListener.CLOSE);
+ }
+
+ @Override
+ public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e)
+ throws Exception {
+ Channel ch = e.getChannel();
+ Throwable cause = e.getCause();
+ if (cause instanceof TooLongFrameException) {
+ sendError(ctx, BAD_REQUEST);
+ return;
+ } else if (cause instanceof IOException) {
+ if (cause instanceof ClosedChannelException) {
+ LOG.debug("Ignoring closed channel error", cause);
+ return;
+ }
+ String message = String.valueOf(cause.getMessage());
+ if (IGNORABLE_ERROR_MESSAGE.matcher(message).matches()) {
+ LOG.debug("Ignoring client socket close", cause);
+ return;
+ }
+ }
+
+ LOG.error("Shuffle error: ", cause);
+ if (ch.isConnected()) {
+ LOG.error("Shuffle error " + e);
+ sendError(ctx, INTERNAL_SERVER_ERROR);
+ }
+ }
+ }
+}
[22/43] tez git commit: TEZ-2090. Add tests for jobs running in
external services. (sseth)
Posted by ss...@apache.org.
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
new file mode 100644
index 0000000..a93c1a4
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.tests;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.tez.client.TezClient;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezException;
+import org.apache.tez.dag.app.launcher.TezTestServiceNoOpContainerLauncher;
+import org.apache.tez.dag.app.rm.TezTestServiceTaskSchedulerService;
+import org.apache.tez.dag.app.taskcomm.TezTestServiceTaskCommunicatorImpl;
+import org.apache.tez.examples.HashJoinExample;
+import org.apache.tez.examples.JoinDataGen;
+import org.apache.tez.examples.JoinValidate;
+import org.apache.tez.service.MiniTezTestServiceCluster;
+import org.apache.tez.test.MiniTezCluster;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestExternalTezServices {
+
+ private static final Log LOG = LogFactory.getLog(TestExternalTezServices.class);
+
+ private static MiniTezCluster tezCluster;
+ private static MiniDFSCluster dfsCluster;
+ private static MiniTezTestServiceCluster tezTestServiceCluster;
+
+ private static Configuration clusterConf = new Configuration();
+ private static Configuration confForJobs;
+
+ private static FileSystem remoteFs;
+ private static FileSystem localFs;
+
+ private static TezClient sharedTezClient;
+
+ private static String TEST_ROOT_DIR = "target" + Path.SEPARATOR + TestExternalTezServices.class.getName()
+ + "-tmpDir";
+
+ @BeforeClass
+ public static void setup() throws IOException, TezException, InterruptedException {
+
+ localFs = FileSystem.getLocal(clusterConf);
+
+ try {
+ clusterConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR);
+ dfsCluster =
+ new MiniDFSCluster.Builder(clusterConf).numDataNodes(1).format(true).racks(null).build();
+ remoteFs = dfsCluster.getFileSystem();
+ LOG.info("MiniDFSCluster started");
+ } catch (IOException io) {
+ throw new RuntimeException("problem starting mini dfs cluster", io);
+ }
+
+ tezCluster = new MiniTezCluster(TestExternalTezServices.class.getName(), 1, 1, 1);
+ Configuration conf = new Configuration();
+ conf.set("fs.defaultFS", remoteFs.getUri().toString()); // use HDFS
+ tezCluster.init(conf);
+ tezCluster.start();
+ LOG.info("MiniTezCluster started");
+
+ clusterConf.set("fs.defaultFS", remoteFs.getUri().toString()); // use HDFS
+ for (Map.Entry<String, String> entry : tezCluster.getConfig()) {
+ clusterConf.set(entry.getKey(), entry.getValue());
+ }
+ long jvmMax = Runtime.getRuntime().maxMemory();
+
+ tezTestServiceCluster = MiniTezTestServiceCluster
+ .create(TestExternalTezServices.class.getSimpleName(), 3, ((long) (jvmMax * 0.5d)), 1);
+ tezTestServiceCluster.init(clusterConf);
+ tezTestServiceCluster.start();
+ LOG.info("MiniTezTestServer started");
+
+ confForJobs = new Configuration(clusterConf);
+ for (Map.Entry<String, String> entry : tezTestServiceCluster
+ .getClusterSpecificConfiguration()) {
+ confForJobs.set(entry.getKey(), entry.getValue());
+ }
+
+ // TODO TEZ-2003 Once per vertex configuration is possible, run separate tests for push vs pull (regular threaded execution)
+
+ Path stagingDirPath = new Path("/tmp/tez-staging-dir");
+ remoteFs.mkdirs(stagingDirPath);
+ // This is currently configured to push tasks into the Service, and then use the standard RPC
+ confForJobs.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
+ confForJobs.set(TezConfiguration.TEZ_AM_TASK_SCHEDULER_CLASS,
+ TezTestServiceTaskSchedulerService.class.getName());
+ confForJobs.set(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHER_CLASS,
+ TezTestServiceNoOpContainerLauncher.class.getName());
+ confForJobs.set(TezConfiguration.TEZ_AM_TASK_COMMUNICATOR_CLASS,
+ TezTestServiceTaskCommunicatorImpl.class.getName());
+
+ TezConfiguration tezConf = new TezConfiguration(confForJobs);
+
+ sharedTezClient = TezClient.create(TestExternalTezServices.class.getSimpleName() + "_session",
+ tezConf, true);
+ sharedTezClient.start();
+ LOG.info("Shared TezSession started");
+ sharedTezClient.waitTillReady();
+ LOG.info("Shared TezSession ready for submission");
+
+ }
+
+ @AfterClass
+ public static void tearDown() throws IOException, TezException {
+ if (sharedTezClient != null) {
+ sharedTezClient.stop();
+ sharedTezClient = null;
+ }
+
+ if (tezTestServiceCluster != null) {
+ tezTestServiceCluster.stop();
+ tezTestServiceCluster = null;
+ }
+
+ if (tezCluster != null) {
+ tezCluster.stop();
+ tezCluster = null;
+ }
+ if (dfsCluster != null) {
+ dfsCluster.shutdown();
+ dfsCluster = null;
+ }
+ // TODO Add cleanup code.
+ }
+
+
+ @Test(timeout = 60000)
+ public void test1() throws Exception {
+ Path testDir = new Path("/tmp/testHashJoinExample");
+
+ remoteFs.mkdirs(testDir);
+
+ Path dataPath1 = new Path(testDir, "inPath1");
+ Path dataPath2 = new Path(testDir, "inPath2");
+ Path expectedOutputPath = new Path(testDir, "expectedOutputPath");
+ Path outPath = new Path(testDir, "outPath");
+
+ TezConfiguration tezConf = new TezConfiguration(confForJobs);
+
+ JoinDataGen dataGen = new JoinDataGen();
+ String[] dataGenArgs = new String[]{
+ dataPath1.toString(), "1048576", dataPath2.toString(), "524288",
+ expectedOutputPath.toString(), "2"};
+ assertEquals(0, dataGen.run(tezConf, dataGenArgs, sharedTezClient));
+
+ HashJoinExample joinExample = new HashJoinExample();
+ String[] args = new String[]{
+ dataPath1.toString(), dataPath2.toString(), "2", outPath.toString()};
+ assertEquals(0, joinExample.run(tezConf, args, sharedTezClient));
+
+ JoinValidate joinValidate = new JoinValidate();
+ String[] validateArgs = new String[]{
+ expectedOutputPath.toString(), outPath.toString(), "3"};
+ assertEquals(0, joinValidate.run(tezConf, validateArgs, sharedTezClient));
+
+ // Ensure this was actually submitted to the external cluster
+ assertTrue(tezTestServiceCluster.getNumSubmissions() > 0);
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/util/ProtoConverters.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/util/ProtoConverters.java b/tez-ext-service-tests/src/test/java/org/apache/tez/util/ProtoConverters.java
new file mode 100644
index 0000000..60ebc53
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/util/ProtoConverters.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.util;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.tez.dag.api.DagTypeConverters;
+import org.apache.tez.dag.api.InputDescriptor;
+import org.apache.tez.dag.api.OutputDescriptor;
+import org.apache.tez.dag.api.ProcessorDescriptor;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.runtime.api.impl.GroupInputSpec;
+import org.apache.tez.runtime.api.impl.InputSpec;
+import org.apache.tez.runtime.api.impl.OutputSpec;
+import org.apache.tez.runtime.api.impl.TaskSpec;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.GroupInputSpecProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.IOSpecProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.TaskSpecProto;
+
+public class ProtoConverters {
+
+ public static TaskSpec getTaskSpecfromProto(TaskSpecProto taskSpecProto) {
+ TezTaskAttemptID taskAttemptID =
+ TezTaskAttemptID.fromString(taskSpecProto.getTaskAttemptIdString());
+
+ ProcessorDescriptor processorDescriptor = null;
+ if (taskSpecProto.hasProcessorDescriptor()) {
+ processorDescriptor = DagTypeConverters
+ .convertProcessorDescriptorFromDAGPlan(taskSpecProto.getProcessorDescriptor());
+ }
+
+ List<InputSpec> inputSpecList = new ArrayList<InputSpec>(taskSpecProto.getInputSpecsCount());
+ if (taskSpecProto.getInputSpecsCount() > 0) {
+ for (IOSpecProto inputSpecProto : taskSpecProto.getInputSpecsList()) {
+ inputSpecList.add(getInputSpecFromProto(inputSpecProto));
+ }
+ }
+
+ List<OutputSpec> outputSpecList =
+ new ArrayList<OutputSpec>(taskSpecProto.getOutputSpecsCount());
+ if (taskSpecProto.getOutputSpecsCount() > 0) {
+ for (IOSpecProto outputSpecProto : taskSpecProto.getOutputSpecsList()) {
+ outputSpecList.add(getOutputSpecFromProto(outputSpecProto));
+ }
+ }
+
+ List<GroupInputSpec> groupInputSpecs =
+ new ArrayList<GroupInputSpec>(taskSpecProto.getGroupedInputSpecsCount());
+ if (taskSpecProto.getGroupedInputSpecsCount() > 0) {
+ for (GroupInputSpecProto groupInputSpecProto : taskSpecProto.getGroupedInputSpecsList()) {
+ groupInputSpecs.add(getGroupInputSpecFromProto(groupInputSpecProto));
+ }
+ }
+
+ TaskSpec taskSpec =
+ new TaskSpec(taskAttemptID, taskSpecProto.getDagName(), taskSpecProto.getVertexName(),
+ taskSpecProto.getVertexParallelism(), processorDescriptor, inputSpecList,
+ outputSpecList, groupInputSpecs);
+ return taskSpec;
+ }
+
+ public static TaskSpecProto convertTaskSpecToProto(TaskSpec taskSpec) {
+ TaskSpecProto.Builder builder = TaskSpecProto.newBuilder();
+ builder.setTaskAttemptIdString(taskSpec.getTaskAttemptID().toString());
+ builder.setDagName(taskSpec.getDAGName());
+ builder.setVertexName(taskSpec.getVertexName());
+ builder.setVertexParallelism(taskSpec.getVertexParallelism());
+
+ if (taskSpec.getProcessorDescriptor() != null) {
+ builder.setProcessorDescriptor(
+ DagTypeConverters.convertToDAGPlan(taskSpec.getProcessorDescriptor()));
+ }
+
+ if (taskSpec.getInputs() != null && !taskSpec.getInputs().isEmpty()) {
+ for (InputSpec inputSpec : taskSpec.getInputs()) {
+ builder.addInputSpecs(convertInputSpecToProto(inputSpec));
+ }
+ }
+
+ if (taskSpec.getOutputs() != null && !taskSpec.getOutputs().isEmpty()) {
+ for (OutputSpec outputSpec : taskSpec.getOutputs()) {
+ builder.addOutputSpecs(convertOutputSpecToProto(outputSpec));
+ }
+ }
+
+ if (taskSpec.getGroupInputs() != null && !taskSpec.getGroupInputs().isEmpty()) {
+ for (GroupInputSpec groupInputSpec : taskSpec.getGroupInputs()) {
+ builder.addGroupedInputSpecs(convertGroupInputSpecToProto(groupInputSpec));
+
+ }
+ }
+ return builder.build();
+ }
+
+
+ public static InputSpec getInputSpecFromProto(IOSpecProto inputSpecProto) {
+ InputDescriptor inputDescriptor = null;
+ if (inputSpecProto.hasIoDescriptor()) {
+ inputDescriptor =
+ DagTypeConverters.convertInputDescriptorFromDAGPlan(inputSpecProto.getIoDescriptor());
+ }
+ InputSpec inputSpec = new InputSpec(inputSpecProto.getConnectedVertexName(), inputDescriptor,
+ inputSpecProto.getPhysicalEdgeCount());
+ return inputSpec;
+ }
+
+ public static IOSpecProto convertInputSpecToProto(InputSpec inputSpec) {
+ IOSpecProto.Builder builder = IOSpecProto.newBuilder();
+ if (inputSpec.getSourceVertexName() != null) {
+ builder.setConnectedVertexName(inputSpec.getSourceVertexName());
+ }
+ if (inputSpec.getInputDescriptor() != null) {
+ builder.setIoDescriptor(DagTypeConverters.convertToDAGPlan(inputSpec.getInputDescriptor()));
+ }
+ builder.setPhysicalEdgeCount(inputSpec.getPhysicalEdgeCount());
+ return builder.build();
+ }
+
+ public static OutputSpec getOutputSpecFromProto(IOSpecProto outputSpecProto) {
+ OutputDescriptor outputDescriptor = null;
+ if (outputSpecProto.hasIoDescriptor()) {
+ outputDescriptor =
+ DagTypeConverters.convertOutputDescriptorFromDAGPlan(outputSpecProto.getIoDescriptor());
+ }
+ OutputSpec outputSpec =
+ new OutputSpec(outputSpecProto.getConnectedVertexName(), outputDescriptor,
+ outputSpecProto.getPhysicalEdgeCount());
+ return outputSpec;
+ }
+
+ public static IOSpecProto convertOutputSpecToProto(OutputSpec outputSpec) {
+ IOSpecProto.Builder builder = IOSpecProto.newBuilder();
+ if (outputSpec.getDestinationVertexName() != null) {
+ builder.setConnectedVertexName(outputSpec.getDestinationVertexName());
+ }
+ if (outputSpec.getOutputDescriptor() != null) {
+ builder.setIoDescriptor(DagTypeConverters.convertToDAGPlan(outputSpec.getOutputDescriptor()));
+ }
+ builder.setPhysicalEdgeCount(outputSpec.getPhysicalEdgeCount());
+ return builder.build();
+ }
+
+ public static GroupInputSpec getGroupInputSpecFromProto(GroupInputSpecProto groupInputSpecProto) {
+ GroupInputSpec groupSpec = new GroupInputSpec(groupInputSpecProto.getGroupName(),
+ groupInputSpecProto.getGroupVerticesList(), DagTypeConverters
+ .convertInputDescriptorFromDAGPlan(groupInputSpecProto.getMergedInputDescriptor()));
+ return groupSpec;
+ }
+
+ public static GroupInputSpecProto convertGroupInputSpecToProto(GroupInputSpec groupInputSpec) {
+ GroupInputSpecProto.Builder builder = GroupInputSpecProto.newBuilder();
+ builder.setGroupName(groupInputSpec.getGroupName());
+ builder.addAllGroupVertices(groupInputSpec.getGroupVertices());
+ builder.setMergedInputDescriptor(
+ DagTypeConverters.convertToDAGPlan(groupInputSpec.getMergedInputDescriptor()));
+ return builder.build();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/proto/TezDaemonProtocol.proto
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/proto/TezDaemonProtocol.proto b/tez-ext-service-tests/src/test/proto/TezDaemonProtocol.proto
new file mode 100644
index 0000000..2f8b2e6
--- /dev/null
+++ b/tez-ext-service-tests/src/test/proto/TezDaemonProtocol.proto
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option java_package = "org.apache.tez.test.service.rpc";
+option java_outer_classname = "TezTestServiceProtocolProtos";
+option java_generic_services = true;
+option java_generate_equals_and_hash = true;
+
+import "DAGApiRecords.proto";
+
+message IOSpecProto {
+ optional string connected_vertex_name = 1;
+ optional TezEntityDescriptorProto io_descriptor = 2;
+ optional int32 physical_edge_count = 3;
+}
+
+message GroupInputSpecProto {
+ optional string group_name = 1;
+ repeated string group_vertices = 2;
+ optional TezEntityDescriptorProto merged_input_descriptor = 3;
+}
+
+message TaskSpecProto {
+ optional string task_attempt_id_string = 1;
+ optional string dag_name = 2;
+ optional string vertex_name = 3;
+ optional TezEntityDescriptorProto processor_descriptor = 4;
+ repeated IOSpecProto input_specs = 5;
+ repeated IOSpecProto output_specs = 6;
+ repeated GroupInputSpecProto grouped_input_specs = 7;
+ optional int32 vertex_parallelism = 8;
+}
+
+
+message SubmitWorkRequestProto {
+ optional string container_id_string = 1;
+ optional string am_host = 2;
+ optional int32 am_port = 3;
+ optional string token_identifier = 4;
+ optional bytes credentials_binary = 5;
+ optional string user = 6;
+ optional string application_id_string = 7;
+ optional int32 app_attempt_number = 8;
+ optional TaskSpecProto task_spec = 9;
+}
+
+message SubmitWorkResponseProto {
+}
+
+
+
+message RunContainerRequestProto {
+ optional string container_id_string = 1;
+ optional string am_host = 2;
+ optional int32 am_port = 3;
+ optional string token_identifier = 4;
+ optional bytes credentials_binary = 5;
+ optional string user = 6;
+ optional string application_id_string = 7;
+ optional int32 app_attempt_number = 8;
+}
+
+message RunContainerResponseProto {
+}
+
+service TezTestServiceProtocol {
+ rpc runContainer(RunContainerRequestProto) returns (RunContainerResponseProto);
+ rpc submitWork(SubmitWorkRequestProto) returns (SubmitWorkResponseProto);
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/resources/log4j.properties b/tez-ext-service-tests/src/test/resources/log4j.properties
new file mode 100644
index 0000000..531b68b
--- /dev/null
+++ b/tez-ext-service-tests/src/test/resources/log4j.properties
@@ -0,0 +1,19 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# log4j configuration used during build and unit tests
+
+log4j.rootLogger=info,stdout
+log4j.threshhold=ALL
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} (%F:%M(%L)) - %m%n
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
index fd55992..3cba3ce 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
@@ -389,7 +389,7 @@ public class TezChild {
private final Throwable throwable;
private final String errorMessage;
- ContainerExecutionResult(ExitStatus exitStatus, @Nullable Throwable throwable,
+ public ContainerExecutionResult(ExitStatus exitStatus, @Nullable Throwable throwable,
@Nullable String errorMessage) {
this.exitStatus = exitStatus;
this.throwable = throwable;
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
index de83889..f54814b 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
@@ -67,7 +67,7 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
private final AtomicBoolean taskRunning;
private final AtomicBoolean shutdownRequested = new AtomicBoolean(false);
- TezTaskRunner(Configuration tezConf, UserGroupInformation ugi, String[] localDirs,
+ public TezTaskRunner(Configuration tezConf, UserGroupInformation ugi, String[] localDirs,
TaskSpec taskSpec, int appAttemptNumber,
Map<String, ByteBuffer> serviceConsumerMetadata, Map<String, String> serviceProviderEnvMap,
Multimap<String, String> startedInputsMap, TaskReporter taskReporter,
[24/43] tez git commit: TEZ-2090. Add tests for jobs running in
external services. (sseth)
Posted by ss...@apache.org.
TEZ-2090. Add tests for jobs running in external services. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/aadd0492
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/aadd0492
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/aadd0492
Branch: refs/heads/TEZ-2003
Commit: aadd04927b0cc646db2579f17f903d8e24916bdc
Parents: 7b71d3b
Author: Siddharth Seth <ss...@apache.org>
Authored: Fri Feb 13 17:24:05 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:01 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
pom.xml | 6 +
.../apache/tez/dag/api/TezConfiguration.java | 2 +
.../apache/tez/dag/api/TaskCommunicator.java | 1 +
.../tez/dag/api/TaskCommunicatorContext.java | 3 +
.../tez/dag/app/TezTaskCommunicatorImpl.java | 42 +-
.../dag/app/rm/TaskSchedulerEventHandler.java | 2 +-
tez-ext-service-tests/pom.xml | 161 ++++
.../tez/dag/app/TezTestServiceCommunicator.java | 152 ++++
.../TezTestServiceContainerLauncher.java | 144 ++++
.../TezTestServiceNoOpContainerLauncher.java | 66 ++
.../rm/TezTestServiceTaskSchedulerService.java | 347 ++++++++
.../TezTestServiceTaskCommunicatorImpl.java | 182 ++++
.../org/apache/tez/service/ContainerRunner.java | 27 +
.../tez/service/MiniTezTestServiceCluster.java | 163 ++++
.../service/TezTestServiceConfConstants.java | 41 +
.../TezTestServiceProtocolBlockingPB.java | 22 +
.../tez/service/impl/ContainerRunnerImpl.java | 512 +++++++++++
.../apache/tez/service/impl/TezTestService.java | 126 +++
.../impl/TezTestServiceProtocolClientImpl.java | 82 ++
.../impl/TezTestServiceProtocolServerImpl.java | 133 +++
.../tez/shufflehandler/FadvisedChunkedFile.java | 78 ++
.../tez/shufflehandler/FadvisedFileRegion.java | 160 ++++
.../apache/tez/shufflehandler/IndexCache.java | 199 +++++
.../tez/shufflehandler/ShuffleHandler.java | 840 +++++++++++++++++++
.../tez/tests/TestExternalTezServices.java | 183 ++++
.../org/apache/tez/util/ProtoConverters.java | 172 ++++
.../src/test/proto/TezDaemonProtocol.proto | 84 ++
.../src/test/resources/log4j.properties | 19 +
.../org/apache/tez/runtime/task/TezChild.java | 2 +-
.../apache/tez/runtime/task/TezTaskRunner.java | 2 +-
31 files changed, 3943 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index d7e4be5..975ce65 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -1,5 +1,6 @@
ALL CHANGES:
TEZ-2019. Temporarily allow the scheduler and launcher to be specified via configuration.
TEZ-2006. Task communication plane needs to be pluggable.
+ TEZ-2090. Add tests for jobs running in external services.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index ce4fa13..ca9db11 100644
--- a/pom.xml
+++ b/pom.xml
@@ -170,6 +170,11 @@
<type>test-jar</type>
</dependency>
<dependency>
+ <groupId>org.apache.tez</groupId>
+ <artifactId>tez-ext-service-tests</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
<groupId>org.apache.pig</groupId>
<artifactId>pig</artifactId>
<version>${pig.version}</version>
@@ -638,6 +643,7 @@
<module>tez-ui</module>
<module>tez-plugins</module>
<module>tez-tools</module>
+ <module>tez-ext-service-tests</module>
<module>tez-dist</module>
<module>docs</module>
</modules>
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
index 01e724e..1cd478e 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
@@ -1172,6 +1172,8 @@ public class TezConfiguration extends Configuration {
public static final String TEZ_AM_CONTAINER_LAUNCHER_CLASS = TEZ_AM_PREFIX + "container-launcher.class";
@ConfigurationScope(Scope.VERTEX)
public static final String TEZ_AM_TASK_SCHEDULER_CLASS = TEZ_AM_PREFIX + "task-scheduler.class";
+ @ConfigurationScope(Scope.VERTEX)
+ public static final String TEZ_AM_TASK_COMMUNICATOR_CLASS = TEZ_AM_PREFIX + "task-communicator.class";
// TODO only validate property here, value can also be validated if necessary
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
index 97f9c16..c9f85e0 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
@@ -14,6 +14,7 @@
package org.apache.tez.dag.api;
+import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.Map;
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
index 9b2d889..41675fe 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
@@ -44,5 +44,8 @@ public interface TaskCommunicatorContext {
// TODO TEZ-2003 Move to vertex, taskIndex, version
void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId);
+ // TODO TEZ-2003 Add an API to register task failure - for example, a communication failure.
+ // This will have to take into consideration the TA_FAILED event
+
// TODO Eventually Add methods to report availability stats to the scheduler.
}
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
index 5652937..258c927 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -74,16 +74,22 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
new ConcurrentHashMap<TaskAttempt, ContainerId>();
private final TezTaskUmbilicalProtocol taskUmbilical;
+ private final String tokenIdentifier;
+ private final Token<JobTokenIdentifier> sessionToken;
private InetSocketAddress address;
private Server server;
- private static final class ContainerInfo {
+ public static final class ContainerInfo {
- ContainerInfo(ContainerId containerId) {
+ ContainerInfo(ContainerId containerId, String host, int port) {
this.containerId = containerId;
+ this.host = host;
+ this.port = port;
}
- ContainerId containerId;
+ final ContainerId containerId;
+ public final String host;
+ public final int port;
TezHeartbeatResponse lastResponse = null;
TaskSpec taskSpec = null;
long lastRequestId = 0;
@@ -110,6 +116,8 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
super(TezTaskCommunicatorImpl.class.getName());
this.taskCommunicatorContext = taskCommunicatorContext;
this.taskUmbilical = new TezTaskUmbilicalProtocolImpl();
+ this.tokenIdentifier = this.taskCommunicatorContext.getApplicationAttemptId().getApplicationId().toString();
+ this.sessionToken = TokenCache.getSessionToken(taskCommunicatorContext.getCredentials());
}
@@ -130,9 +138,7 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
try {
JobTokenSecretManager jobTokenSecretManager =
new JobTokenSecretManager();
- Token<JobTokenIdentifier> sessionToken = TokenCache.getSessionToken(taskCommunicatorContext.getCredentials());
- jobTokenSecretManager.addTokenForJob(
- taskCommunicatorContext.getApplicationAttemptId().getApplicationId().toString(), sessionToken);
+ jobTokenSecretManager.addTokenForJob(tokenIdentifier, sessionToken);
server = new RPC.Builder(conf)
.setProtocol(TezTaskUmbilicalProtocol.class)
@@ -182,7 +188,7 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
@Override
public void registerRunningContainer(ContainerId containerId, String host, int port) {
- ContainerInfo oldInfo = registeredContainers.putIfAbsent(containerId, new ContainerInfo(containerId));
+ ContainerInfo oldInfo = registeredContainers.putIfAbsent(containerId, new ContainerInfo(containerId, host, port));
if (oldInfo != null) {
throw new TezUncheckedException("Multiple registrations for containerId: " + containerId);
}
@@ -230,9 +236,9 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
". Already registered to containerId: " + oldId);
}
}
-
}
+
@Override
public void unregisterRunningTaskAttempt(TezTaskAttemptID taskAttemptID) {
TaskAttempt taskAttempt = new TaskAttempt(taskAttemptID);
@@ -258,6 +264,18 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
return address;
}
+ protected String getTokenIdentifier() {
+ return tokenIdentifier;
+ }
+
+ protected Token<JobTokenIdentifier> getSessionToken() {
+ return sessionToken;
+ }
+
+ protected TaskCommunicatorContext getTaskCommunicatorContext() {
+ return taskCommunicatorContext;
+ }
+
public TezTaskUmbilicalProtocol getUmbilical() {
return this.taskUmbilical;
}
@@ -471,4 +489,12 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
return "TaskAttempt{" + "taskAttemptId=" + taskAttemptId + '}';
}
}
+
+ protected ContainerInfo getContainerInfo(ContainerId containerId) {
+ return registeredContainers.get(containerId);
+ }
+
+ protected ContainerId getContainerForAttempt(TezTaskAttemptID taskAttemptId) {
+ return attemptToContainerMap.get(new TaskAttempt(taskAttemptId));
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
index 62f82db..8c3ed87 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
@@ -350,7 +350,7 @@ public class TaskSchedulerEventHandler extends AbstractService
try {
Constructor<? extends TaskSchedulerService> ctor = taskSchedulerClazz
.getConstructor(TaskSchedulerAppCallback.class, AppContext.class, String.class,
- Integer.class, String.class, Configuration.class);
+ int.class, String.class, Configuration.class);
ctor.setAccessible(true);
TaskSchedulerService taskSchedulerService =
ctor.newInstance(this, appContext, host, port, trackingUrl, getConfig());
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/pom.xml
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/pom.xml b/tez-ext-service-tests/pom.xml
new file mode 100644
index 0000000..37f68b1
--- /dev/null
+++ b/tez-ext-service-tests/pom.xml
@@ -0,0 +1,161 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed under the Apache License, Version 2.0 (the "License");
+ ~ you may not use this file except in compliance with the License.
+ ~ You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>tez</artifactId>
+ <groupId>org.apache.tez</groupId>
+ <version>0.7.0-SNAPSHOT</version>
+ </parent>
+
+ <!-- TODO TEZ-2003 Merge this into the tez-tests module -->
+ <artifactId>tez-ext-service-tests</artifactId>
+
+ <dependencies>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-all</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tez</groupId>
+ <artifactId>tez-runtime-internals</artifactId>
+ </dependency>
+ <dependency>
+ <!-- Required for the ShuffleHandler -->
+ <groupId>org.apache.tez</groupId>
+ <artifactId>tez-runtime-library</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tez</groupId>
+ <artifactId>tez-dag</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tez</groupId>
+ <artifactId>tez-tests</artifactId>
+ <scope>test</scope>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <scope>test</scope>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-server-tests</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+
+ </dependencies>
+
+ <build>
+ <!--
+ Include all files in src/main/resources. By default, do not apply property
+ substitution (filtering=false), but do apply property substitution to
+ version-info.properties (filtering=true). This will substitute the
+ version information correctly, but prevent Maven from altering other files.
+ -->
+ <resources>
+ <resource>
+ <directory>${basedir}/src/main/resources</directory>
+ <excludes>
+ <exclude>tez-api-version-info.properties</exclude>
+ </excludes>
+ <filtering>false</filtering>
+ </resource>
+ <resource>
+ <directory>${basedir}/src/main/resources</directory>
+ <includes>
+ <include>tez-api-version-info.properties</include>
+ </includes>
+ <filtering>true</filtering>
+ </resource>
+ </resources>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-maven-plugins</artifactId>
+ <executions>
+ <execution>
+ <id>compile-protoc</id>
+ <phase>generate-sources</phase>
+ <goals>
+ <goal>protoc</goal>
+ </goals>
+ <configuration>
+ <protocVersion>${protobuf.version}</protocVersion>
+ <protocCommand>${protoc.path}</protocCommand>
+ <imports>
+ <param>${basedir}/src/test/proto</param>
+ <param>${basedir}/../tez-api/src/main/proto</param>
+ </imports>
+ <source>
+ <directory>${basedir}/src/test/proto</directory>
+ <includes>
+ <include>TezDaemonProtocol.proto</include>
+ </includes>
+ </source>
+ <output>${project.build.directory}/generated-test-sources/java</output>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/TezTestServiceCommunicator.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/TezTestServiceCommunicator.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/TezTestServiceCommunicator.java
new file mode 100644
index 0000000..ac50878
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/TezTestServiceCommunicator.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app;
+
+
+import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import com.google.common.util.concurrent.FutureCallback;
+import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.common.util.concurrent.ListeningExecutorService;
+import com.google.common.util.concurrent.MoreExecutors;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import com.google.protobuf.Message;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.tez.service.TezTestServiceProtocolBlockingPB;
+import org.apache.tez.service.impl.TezTestServiceProtocolClientImpl;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerResponseProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkResponseProto;
+
+public class TezTestServiceCommunicator extends AbstractService {
+
+ private final ConcurrentMap<String, TezTestServiceProtocolBlockingPB> hostProxies;
+ private final ListeningExecutorService executor;
+
+ // TODO Convert this into a singleton
+ public TezTestServiceCommunicator(int numThreads) {
+ super(TezTestServiceCommunicator.class.getSimpleName());
+ ExecutorService localExecutor = Executors.newFixedThreadPool(numThreads,
+ new ThreadFactoryBuilder().setNameFormat("TezTestServiceCommunicator #%2d").build());
+ this.hostProxies = new ConcurrentHashMap<String, TezTestServiceProtocolBlockingPB>();
+ executor = MoreExecutors.listeningDecorator(localExecutor);
+ }
+
+ @Override
+ public void serviceStop() {
+ executor.shutdownNow();
+ }
+
+
+ public void runContainer(RunContainerRequestProto request, String host, int port,
+ final ExecuteRequestCallback<RunContainerResponseProto> callback) {
+ ListenableFuture<RunContainerResponseProto> future = executor.submit(new RunContainerCallable(request, host, port));
+ Futures.addCallback(future, new FutureCallback<RunContainerResponseProto>() {
+ @Override
+ public void onSuccess(RunContainerResponseProto result) {
+ callback.setResponse(result);
+ }
+
+ @Override
+ public void onFailure(Throwable t) {
+ callback.indicateError(t);
+ }
+ });
+
+ }
+
+ public void submitWork(SubmitWorkRequestProto request, String host, int port,
+ final ExecuteRequestCallback<SubmitWorkResponseProto> callback) {
+ ListenableFuture<SubmitWorkResponseProto> future = executor.submit(new SubmitWorkCallable(request, host, port));
+ Futures.addCallback(future, new FutureCallback<SubmitWorkResponseProto>() {
+ @Override
+ public void onSuccess(SubmitWorkResponseProto result) {
+ callback.setResponse(result);
+ }
+
+ @Override
+ public void onFailure(Throwable t) {
+ callback.indicateError(t);
+ }
+ });
+
+ }
+
+
+ private class RunContainerCallable implements Callable<RunContainerResponseProto> {
+
+ final String hostname;
+ final int port;
+ final RunContainerRequestProto request;
+
+ private RunContainerCallable(RunContainerRequestProto request, String hostname, int port) {
+ this.hostname = hostname;
+ this.port = port;
+ this.request = request;
+ }
+
+ @Override
+ public RunContainerResponseProto call() throws Exception {
+ return getProxy(hostname, port).runContainer(null, request);
+ }
+ }
+
+ private class SubmitWorkCallable implements Callable<SubmitWorkResponseProto> {
+ final String hostname;
+ final int port;
+ final SubmitWorkRequestProto request;
+
+ private SubmitWorkCallable(SubmitWorkRequestProto request, String hostname, int port) {
+ this.hostname = hostname;
+ this.port = port;
+ this.request = request;
+ }
+
+ @Override
+ public SubmitWorkResponseProto call() throws Exception {
+ return getProxy(hostname, port).submitWork(null, request);
+ }
+ }
+
+ public interface ExecuteRequestCallback<T extends Message> {
+ void setResponse(T response);
+ void indicateError(Throwable t);
+ }
+
+ private TezTestServiceProtocolBlockingPB getProxy(String hostname, int port) {
+ String hostId = getHostIdentifier(hostname, port);
+
+ TezTestServiceProtocolBlockingPB proxy = hostProxies.get(hostId);
+ if (proxy == null) {
+ proxy = new TezTestServiceProtocolClientImpl(getConfig(), hostname, port);
+ TezTestServiceProtocolBlockingPB proxyOld = hostProxies.putIfAbsent(hostId, proxy);
+ if (proxyOld != null) {
+ // TODO Shutdown the new proxy.
+ proxy = proxyOld;
+ }
+ }
+ return proxy;
+ }
+
+ private String getHostIdentifier(String hostname, int port) {
+ return hostname + ":" + port;
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
new file mode 100644
index 0000000..e83165b
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app.launcher;
+
+import com.google.common.base.Preconditions;
+import com.google.protobuf.ByteString;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.util.Clock;
+import org.apache.tez.dag.app.AppContext;
+import org.apache.tez.dag.app.TaskAttemptListener;
+import org.apache.tez.dag.app.TezTestServiceCommunicator;
+import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
+import org.apache.tez.dag.app.rm.NMCommunicatorLaunchRequestEvent;
+import org.apache.tez.dag.app.rm.container.AMContainerEvent;
+import org.apache.tez.dag.app.rm.container.AMContainerEventLaunchFailed;
+import org.apache.tez.dag.app.rm.container.AMContainerEventLaunched;
+import org.apache.tez.dag.app.rm.container.AMContainerEventType;
+import org.apache.tez.dag.history.DAGHistoryEvent;
+import org.apache.tez.dag.history.events.ContainerLaunchedEvent;
+import org.apache.tez.service.TezTestServiceConfConstants;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+
+public class TezTestServiceContainerLauncher extends AbstractService implements ContainerLauncher {
+
+ // TODO Support interruptability of tasks which haven't yet been launched.
+
+ // TODO May need multiple connections per target machine, depending upon how synchronization is handled in the RPC layer
+
+ static final Log LOG = LogFactory.getLog(TezTestServiceContainerLauncher.class);
+
+ private final AppContext context;
+ private final String tokenIdentifier;
+ private final TaskAttemptListener tal;
+ private final int servicePort;
+ private final TezTestServiceCommunicator communicator;
+ private final Clock clock;
+
+
+ // Configuration passed in here to set up final parameters
+ public TezTestServiceContainerLauncher(AppContext appContext, Configuration conf,
+ TaskAttemptListener tal) {
+ super(TezTestServiceContainerLauncher.class.getName());
+ this.clock = appContext.getClock();
+ int numThreads = conf.getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_AM_COMMUNICATOR_NUM_THREADS,
+ TezTestServiceConfConstants.TEZ_TEST_SERVICE_AM_COMMUNICATOR_NUM_THREADS_DEFAULT);
+
+ this.servicePort = conf.getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_RPC_PORT, -1);
+ Preconditions.checkArgument(servicePort > 0,
+ TezTestServiceConfConstants.TEZ_TEST_SERVICE_RPC_PORT + " must be set");
+ this.communicator = new TezTestServiceCommunicator(numThreads);
+ this.context = appContext;
+ this.tokenIdentifier = context.getApplicationID().toString();
+ this.tal = tal;
+ }
+
+ @Override
+ public void serviceInit(Configuration conf) {
+ communicator.init(conf);
+ }
+
+ @Override
+ public void serviceStart() {
+ communicator.start();
+ }
+
+ @Override
+ public void serviceStop() {
+ communicator.stop();
+ }
+
+ @Override
+ public void handle(NMCommunicatorEvent event) {
+ switch (event.getType()) {
+ case CONTAINER_LAUNCH_REQUEST:
+ final NMCommunicatorLaunchRequestEvent launchEvent = (NMCommunicatorLaunchRequestEvent) event;
+ RunContainerRequestProto runRequest = constructRunContainerRequest(launchEvent);
+ communicator.runContainer(runRequest, launchEvent.getNodeId().getHost(),
+ launchEvent.getNodeId().getPort(),
+ new TezTestServiceCommunicator.ExecuteRequestCallback<TezTestServiceProtocolProtos.RunContainerResponseProto>() {
+ @Override
+ public void setResponse(TezTestServiceProtocolProtos.RunContainerResponseProto response) {
+ LOG.info("Container: " + launchEvent.getContainerId() + " launch succeeded on host: " + launchEvent.getNodeId());
+ context.getEventHandler().handle(new AMContainerEventLaunched(launchEvent.getContainerId()));
+ ContainerLaunchedEvent lEvt = new ContainerLaunchedEvent(
+ launchEvent.getContainerId(), clock.getTime(), context.getApplicationAttemptId());
+ context.getHistoryHandler().handle(new DAGHistoryEvent(
+ null, lEvt));
+ }
+
+ @Override
+ public void indicateError(Throwable t) {
+ LOG.error("Failed to launch container: " + launchEvent.getContainer() + " on host: " + launchEvent.getNodeId(), t);
+ sendContainerLaunchFailedMsg(launchEvent.getContainerId(), t);
+ }
+ });
+ break;
+ case CONTAINER_STOP_REQUEST:
+ LOG.info("DEBUG: Ignoring STOP_REQUEST for event: " + event);
+ // that the container is actually done (normally received from RM)
+ // TODO Sending this out for an un-launched container is invalid
+ context.getEventHandler().handle(new AMContainerEvent(event.getContainerId(),
+ AMContainerEventType.C_NM_STOP_SENT));
+ break;
+ }
+ }
+
+ private RunContainerRequestProto constructRunContainerRequest(NMCommunicatorLaunchRequestEvent event) {
+ RunContainerRequestProto.Builder builder = RunContainerRequestProto.newBuilder();
+ builder.setAmHost(tal.getAddress().getHostName()).setAmPort(tal.getAddress().getPort());
+ builder.setAppAttemptNumber(event.getContainer().getId().getApplicationAttemptId().getAttemptId());
+ builder.setApplicationIdString(
+ event.getContainer().getId().getApplicationAttemptId().getApplicationId().toString());
+ builder.setTokenIdentifier(tokenIdentifier);
+ builder.setContainerIdString(event.getContainer().getId().toString());
+ builder.setCredentialsBinary(
+ ByteString.copyFrom(event.getContainerLaunchContext().getTokens()));
+ // TODO Avoid reading this from the environment
+ builder.setUser(System.getenv(ApplicationConstants.Environment.USER.name()));
+ return builder.build();
+ }
+
+ @SuppressWarnings("unchecked")
+ void sendContainerLaunchFailedMsg(ContainerId containerId, Throwable t) {
+ context.getEventHandler().handle(new AMContainerEventLaunchFailed(containerId, t == null ? "" : t.getMessage()));
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java
new file mode 100644
index 0000000..8c8e486
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceNoOpContainerLauncher.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app.launcher;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.util.Clock;
+import org.apache.tez.dag.app.AppContext;
+import org.apache.tez.dag.app.TaskAttemptListener;
+import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
+import org.apache.tez.dag.app.rm.NMCommunicatorLaunchRequestEvent;
+import org.apache.tez.dag.app.rm.container.AMContainerEvent;
+import org.apache.tez.dag.app.rm.container.AMContainerEventLaunched;
+import org.apache.tez.dag.app.rm.container.AMContainerEventType;
+import org.apache.tez.dag.history.DAGHistoryEvent;
+import org.apache.tez.dag.history.events.ContainerLaunchedEvent;
+
+public class TezTestServiceNoOpContainerLauncher extends AbstractService implements ContainerLauncher {
+
+ static final Log LOG = LogFactory.getLog(TezTestServiceNoOpContainerLauncher.class);
+
+ private final AppContext context;
+ private final Clock clock;
+
+ public TezTestServiceNoOpContainerLauncher(AppContext appContext, Configuration conf,
+ TaskAttemptListener tal) {
+ super(TezTestServiceNoOpContainerLauncher.class.getName());
+ this.context = appContext;
+ this.clock = appContext.getClock();
+ }
+
+ @Override
+ public void handle(NMCommunicatorEvent event) {
+ switch(event.getType()) {
+ case CONTAINER_LAUNCH_REQUEST:
+ final NMCommunicatorLaunchRequestEvent launchEvent = (NMCommunicatorLaunchRequestEvent) event;
+ LOG.info("No-op launch for container: " + launchEvent.getContainerId() + " succeeded on host: " + launchEvent.getNodeId());
+ context.getEventHandler().handle(new AMContainerEventLaunched(launchEvent.getContainerId()));
+ ContainerLaunchedEvent lEvt = new ContainerLaunchedEvent(
+ launchEvent.getContainerId(), clock.getTime(), context.getApplicationAttemptId());
+ context.getHistoryHandler().handle(new DAGHistoryEvent(
+ null, lEvt));
+ break;
+ case CONTAINER_STOP_REQUEST:
+ LOG.info("DEBUG: Ignoring STOP_REQUEST for event: " + event);
+ context.getEventHandler().handle(new AMContainerEvent(event.getContainerId(),
+ AMContainerEventType.C_NM_STOP_SENT));
+ break;
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
new file mode 100644
index 0000000..e3c18bf
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
@@ -0,0 +1,347 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app.rm;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.primitives.Ints;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.NodeReport;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.client.api.AMRMClient;
+import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.tez.dag.api.TezUncheckedException;
+import org.apache.tez.dag.app.AppContext;
+import org.apache.tez.service.TezTestServiceConfConstants;
+
+
+// TODO Registration with RM - so that the AM is considered dead and restarted in the expiry interval - 10 minutes.
+
+public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
+
+ private static final Log LOG = LogFactory.getLog(TezTestServiceTaskSchedulerService.class);
+
+ private final ExecutorService appCallbackExecutor;
+ private final TaskSchedulerAppCallback appClientDelegate;
+ private final AppContext appContext;
+ private final List<String> serviceHosts;
+ private final ContainerFactory containerFactory;
+ private final Random random = new Random();
+ // Currently all services must be running on the same port.
+ private final int containerPort;
+
+ private final String clientHostname;
+ private final int clientPort;
+ private final String trackingUrl;
+ private final AtomicBoolean isStopped = new AtomicBoolean(false);
+ private final ConcurrentMap<Object, ContainerId> runningTasks =
+ new ConcurrentHashMap<Object, ContainerId>();
+
+ private final AMRMClientAsync<AMRMClient.ContainerRequest> amRmClient;
+
+ // Per instance
+ private final int memoryPerInstance;
+ private final int coresPerInstance;
+ private final int executorsPerInstance;
+
+ // Per Executor Thread
+ private final Resource resourcePerContainer;
+
+
+ public TezTestServiceTaskSchedulerService(TaskSchedulerAppCallback appClient,
+ AppContext appContext,
+ String clientHostname, int clientPort,
+ String trackingUrl,
+ Configuration conf) {
+ // Accepting configuration here to allow setting up fields as final
+ super(TezTestServiceTaskSchedulerService.class.getName());
+ this.appCallbackExecutor = createAppCallbackExecutorService();
+ this.appClientDelegate = createAppCallbackDelegate(appClient);
+ this.appContext = appContext;
+ this.serviceHosts = new LinkedList<String>();
+ this.containerFactory = new ContainerFactory(appContext);
+
+ this.memoryPerInstance = conf
+ .getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB, -1);
+ Preconditions.checkArgument(memoryPerInstance > 0,
+ TezTestServiceConfConstants.TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB +
+ " must be configured");
+
+ this.executorsPerInstance = conf.getInt(
+ TezTestServiceConfConstants.TEZ_TEST_SERVICE_NUM_EXECUTORS_PER_INSTANCE,
+ -1);
+ Preconditions.checkArgument(executorsPerInstance > 0,
+ TezTestServiceConfConstants.TEZ_TEST_SERVICE_NUM_EXECUTORS_PER_INSTANCE +
+ " must be configured");
+
+ this.coresPerInstance = conf
+ .getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_VCPUS_PER_INSTANCE,
+ executorsPerInstance);
+
+ this.containerPort = conf.getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_RPC_PORT, -1);
+ Preconditions.checkArgument(executorsPerInstance > 0,
+ TezTestServiceConfConstants.TEZ_TEST_SERVICE_RPC_PORT + " must be configured");
+
+ this.clientHostname = clientHostname;
+ this.clientPort = clientPort;
+ this.trackingUrl = trackingUrl;
+
+ int memoryPerContainer = (int) (memoryPerInstance / (float) executorsPerInstance);
+ int coresPerContainer = (int) (coresPerInstance / (float) executorsPerInstance);
+ this.resourcePerContainer = Resource.newInstance(memoryPerContainer, coresPerContainer);
+ this.amRmClient = TezAMRMClientAsync.createAMRMClientAsync(5000, new FakeAmRmCallbackHandler());
+
+ String[] hosts = conf.getTrimmedStrings(TezTestServiceConfConstants.TEZ_TEST_SERVICE_HOSTS);
+ if (hosts == null || hosts.length == 0) {
+ hosts = new String[]{"localhost"};
+ }
+ for (String host : hosts) {
+ serviceHosts.add(host);
+ }
+
+ LOG.info("Running with configuration: " +
+ "memoryPerInstance=" + memoryPerInstance +
+ ", vcoresPerInstance=" + coresPerInstance +
+ ", executorsPerInstance=" + executorsPerInstance +
+ ", resourcePerContainerInferred=" + resourcePerContainer +
+ ", hosts=" + serviceHosts.toString());
+
+ }
+
+ @Override
+ public void serviceInit(Configuration conf) {
+ amRmClient.init(conf);
+ }
+
+ @Override
+ public void serviceStart() {
+ amRmClient.start();
+ RegisterApplicationMasterResponse response;
+ try {
+ amRmClient.registerApplicationMaster(clientHostname, clientPort, trackingUrl);
+ } catch (YarnException e) {
+ throw new TezUncheckedException(e);
+ } catch (IOException e) {
+ throw new TezUncheckedException(e);
+ }
+ }
+
+ @Override
+ public void serviceStop() {
+ if (!this.isStopped.getAndSet(true)) {
+
+ try {
+ TaskSchedulerAppCallback.AppFinalStatus status = appClientDelegate.getFinalAppStatus();
+ amRmClient.unregisterApplicationMaster(status.exitStatus, status.exitMessage,
+ status.postCompletionTrackingUrl);
+ } catch (YarnException e) {
+ throw new TezUncheckedException(e);
+ } catch (IOException e) {
+ throw new TezUncheckedException(e);
+ }
+ appCallbackExecutor.shutdownNow();
+ }
+ }
+
+ @Override
+ public Resource getAvailableResources() {
+ // TODO This needs information about all running executors, and the amount of memory etc available across the cluster.
+ return Resource
+ .newInstance(Ints.checkedCast(serviceHosts.size() * memoryPerInstance),
+ serviceHosts.size() * coresPerInstance);
+ }
+
+ @Override
+ public int getClusterNodeCount() {
+ return serviceHosts.size();
+ }
+
+ @Override
+ public void resetMatchLocalityForAllHeldContainers() {
+ }
+
+ @Override
+ public Resource getTotalResources() {
+ return Resource
+ .newInstance(Ints.checkedCast(serviceHosts.size() * memoryPerInstance),
+ serviceHosts.size() * coresPerInstance);
+ }
+
+ @Override
+ public void blacklistNode(NodeId nodeId) {
+ LOG.info("DEBUG: BlacklistNode not supported");
+ }
+
+ @Override
+ public void unblacklistNode(NodeId nodeId) {
+ LOG.info("DEBUG: unBlacklistNode not supported");
+ }
+
+ @Override
+ public void allocateTask(Object task, Resource capability, String[] hosts, String[] racks,
+ Priority priority, Object containerSignature, Object clientCookie) {
+ String host = selectHost(hosts);
+ Container container =
+ containerFactory.createContainer(resourcePerContainer, priority, host, containerPort);
+ runningTasks.put(task, container.getId());
+ appClientDelegate.taskAllocated(task, clientCookie, container);
+ }
+
+
+ @Override
+ public void allocateTask(Object task, Resource capability, ContainerId containerId,
+ Priority priority, Object containerSignature, Object clientCookie) {
+ String host = selectHost(null);
+ Container container =
+ containerFactory.createContainer(resourcePerContainer, priority, host, containerPort);
+ runningTasks.put(task, container.getId());
+ appClientDelegate.taskAllocated(task, clientCookie, container);
+ }
+
+ @Override
+ public boolean deallocateTask(Object task, boolean taskSucceeded) {
+ ContainerId containerId = runningTasks.remove(task);
+ if (containerId == null) {
+ LOG.error("Could not determine ContainerId for task: " + task +
+ " . Could have hit a race condition. Ignoring." +
+ " The query may hang since this \"unknown\" container is now taking up a slot permanently");
+ return false;
+ }
+ appClientDelegate.containerBeingReleased(containerId);
+ return true;
+ }
+
+ @Override
+ public Object deallocateContainer(ContainerId containerId) {
+ LOG.info("DEBUG: Ignoring deallocateContainer for containerId: " + containerId);
+ return null;
+ }
+
+ @Override
+ public void setShouldUnregister() {
+
+ }
+
+ @Override
+ public boolean hasUnregistered() {
+ // Nothing to do. No registration involved.
+ return true;
+ }
+
+ private ExecutorService createAppCallbackExecutorService() {
+ return Executors.newSingleThreadExecutor(new ThreadFactoryBuilder()
+ .setNameFormat("TaskSchedulerAppCaller #%d").setDaemon(true).build());
+ }
+
+ private TaskSchedulerAppCallback createAppCallbackDelegate(
+ TaskSchedulerAppCallback realAppClient) {
+ return new TaskSchedulerAppCallbackWrapper(realAppClient,
+ appCallbackExecutor);
+ }
+
+ private String selectHost(String[] requestedHosts) {
+ String host = null;
+ if (requestedHosts != null && requestedHosts.length > 0) {
+ Arrays.sort(requestedHosts);
+ host = requestedHosts[0];
+ LOG.info("Selected host: " + host + " from requested hosts: " + Arrays.toString(requestedHosts));
+ } else {
+ host = serviceHosts.get(random.nextInt(serviceHosts.size()));
+ LOG.info("Selected random host: " + host + " since the request contained no host information");
+ }
+ return host;
+ }
+
+ static class ContainerFactory {
+ final AppContext appContext;
+ AtomicInteger nextId;
+
+ public ContainerFactory(AppContext appContext) {
+ this.appContext = appContext;
+ this.nextId = new AtomicInteger(2);
+ }
+
+ public Container createContainer(Resource capability, Priority priority, String hostname, int port) {
+ ApplicationAttemptId appAttemptId = appContext.getApplicationAttemptId();
+ ContainerId containerId = ContainerId.newInstance(appAttemptId, nextId.getAndIncrement());
+ NodeId nodeId = NodeId.newInstance(hostname, port);
+ String nodeHttpAddress = "hostname:0";
+
+ Container container = Container.newInstance(containerId,
+ nodeId,
+ nodeHttpAddress,
+ capability,
+ priority,
+ null);
+
+ return container;
+ }
+ }
+
+ private static class FakeAmRmCallbackHandler implements AMRMClientAsync.CallbackHandler {
+
+ @Override
+ public void onContainersCompleted(List<ContainerStatus> statuses) {
+
+ }
+
+ @Override
+ public void onContainersAllocated(List<Container> containers) {
+
+ }
+
+ @Override
+ public void onShutdownRequest() {
+
+ }
+
+ @Override
+ public void onNodesUpdated(List<NodeReport> updatedNodes) {
+
+ }
+
+ @Override
+ public float getProgress() {
+ return 0;
+ }
+
+ @Override
+ public void onError(Throwable e) {
+
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
new file mode 100644
index 0000000..78cdcde
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
@@ -0,0 +1,182 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app.taskcomm;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+
+import com.google.protobuf.ByteString;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.tez.dag.api.TaskCommunicatorContext;
+import org.apache.tez.dag.app.TezTaskCommunicatorImpl;
+import org.apache.tez.dag.app.TezTestServiceCommunicator;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.runtime.api.impl.TaskSpec;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkResponseProto;
+import org.apache.tez.util.ProtoConverters;
+
+
+public class TezTestServiceTaskCommunicatorImpl extends TezTaskCommunicatorImpl {
+
+ private static final Log LOG = LogFactory.getLog(TezTestServiceTaskCommunicatorImpl.class);
+
+ private final TezTestServiceCommunicator communicator;
+ private final SubmitWorkRequestProto BASE_SUBMIT_WORK_REQUEST;
+ private final ConcurrentMap<String, ByteBuffer> credentialMap;
+
+ public TezTestServiceTaskCommunicatorImpl(
+ TaskCommunicatorContext taskCommunicatorContext) {
+ super(taskCommunicatorContext);
+ // TODO Maybe make this configurable
+ this.communicator = new TezTestServiceCommunicator(3);
+
+ SubmitWorkRequestProto.Builder baseBuilder = SubmitWorkRequestProto.newBuilder();
+
+ // TODO Avoid reading this from the environment
+ baseBuilder.setUser(System.getenv(ApplicationConstants.Environment.USER.name()));
+ baseBuilder.setApplicationIdString(
+ taskCommunicatorContext.getApplicationAttemptId().getApplicationId().toString());
+ baseBuilder
+ .setAppAttemptNumber(taskCommunicatorContext.getApplicationAttemptId().getAttemptId());
+ baseBuilder.setTokenIdentifier(getTokenIdentifier());
+
+ BASE_SUBMIT_WORK_REQUEST = baseBuilder.build();
+
+ credentialMap = new ConcurrentHashMap<String, ByteBuffer>();
+ }
+
+ @Override
+ public void serviceInit(Configuration conf) throws Exception {
+ super.serviceInit(conf);
+ this.communicator.init(conf);
+ }
+
+ @Override
+ public void serviceStart() {
+ super.serviceStart();
+ this.communicator.start();
+ }
+
+ @Override
+ public void serviceStop() {
+ super.serviceStop();
+ }
+
+
+ @Override
+ public void registerRunningContainer(ContainerId containerId, String hostname, int port) {
+ super.registerRunningContainer(containerId, hostname, port);
+ }
+
+ @Override
+ public void registerContainerEnd(ContainerId containerId) {
+ super.registerContainerEnd(containerId);
+ }
+
+ @Override
+ public void registerRunningTaskAttempt(final ContainerId containerId, final TaskSpec taskSpec,
+ Map<String, LocalResource> additionalResources,
+ Credentials credentials,
+ boolean credentialsChanged) {
+ super.registerRunningTaskAttempt(containerId, taskSpec, additionalResources, credentials,
+ credentialsChanged);
+ SubmitWorkRequestProto requestProto = null;
+ try {
+ requestProto = constructSubmitWorkRequest(containerId, taskSpec);
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to construct request", e);
+ }
+ ContainerInfo containerInfo = getContainerInfo(containerId);
+ String host;
+ int port;
+ if (containerInfo != null) {
+ synchronized (containerInfo) {
+ host = containerInfo.host;
+ port = containerInfo.port;
+ }
+ } else {
+ // TODO Handle this properly
+ throw new RuntimeException("ContainerInfo not found for container: " + containerId +
+ ", while trying to launch task: " + taskSpec.getTaskAttemptID());
+ }
+ communicator.submitWork(requestProto, host, port,
+ new TezTestServiceCommunicator.ExecuteRequestCallback<SubmitWorkResponseProto>() {
+ @Override
+ public void setResponse(SubmitWorkResponseProto response) {
+ LOG.info("Successfully launched task: " + taskSpec.getTaskAttemptID());
+ getTaskCommunicatorContext()
+ .taskStartedRemotely(taskSpec.getTaskAttemptID(), containerId);
+ }
+
+ @Override
+ public void indicateError(Throwable t) {
+ // TODO Handle this error. This is where an API on the context to indicate failure / rejection comes in.
+ LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " +
+ containerId, t);
+ }
+ });
+ }
+
+ @Override
+ public void unregisterRunningTaskAttempt(TezTaskAttemptID taskAttemptID) {
+ super.unregisterRunningTaskAttempt(taskAttemptID);
+ // Nothing else to do for now. The push API in the test does not support termination of a running task
+ }
+
+ private SubmitWorkRequestProto constructSubmitWorkRequest(ContainerId containerId,
+ TaskSpec taskSpec) throws
+ IOException {
+ SubmitWorkRequestProto.Builder builder =
+ SubmitWorkRequestProto.newBuilder(BASE_SUBMIT_WORK_REQUEST);
+ builder.setContainerIdString(containerId.toString());
+ builder.setAmHost(getAddress().getHostName());
+ builder.setAmPort(getAddress().getPort());
+ Credentials taskCredentials = new Credentials();
+ // Credentials can change across DAGs. Ideally construct only once per DAG.
+ taskCredentials.addAll(getTaskCommunicatorContext().getCredentials());
+
+ ByteBuffer credentialsBinary = credentialMap.get(taskSpec.getDAGName());
+ if (credentialsBinary == null) {
+ credentialsBinary = serializeCredentials(getTaskCommunicatorContext().getCredentials());
+ credentialMap.putIfAbsent(taskSpec.getDAGName(), credentialsBinary.duplicate());
+ } else {
+ credentialsBinary = credentialsBinary.duplicate();
+ }
+ builder.setCredentialsBinary(ByteString.copyFrom(credentialsBinary));
+ builder.setTaskSpec(ProtoConverters.convertTaskSpecToProto(taskSpec));
+ return builder.build();
+ }
+
+ private ByteBuffer serializeCredentials(Credentials credentials) throws IOException {
+ Credentials containerCredentials = new Credentials();
+ containerCredentials.addAll(credentials);
+ DataOutputBuffer containerTokens_dob = new DataOutputBuffer();
+ containerCredentials.writeTokenStorageToStream(containerTokens_dob);
+ ByteBuffer containerCredentialsBuffer = ByteBuffer.wrap(containerTokens_dob.getData(), 0,
+ containerTokens_dob.getLength());
+ return containerCredentialsBuffer;
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java
new file mode 100644
index 0000000..2bca4ed
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.service;
+
+import java.io.IOException;
+
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
+
+public interface ContainerRunner {
+
+ void queueContainer(RunContainerRequestProto request) throws IOException;
+ void submitWork(SubmitWorkRequestProto request) throws IOException;
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
new file mode 100644
index 0000000..f47bd67
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.service;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+
+import com.google.common.base.Preconditions;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
+import org.apache.tez.service.impl.TezTestService;
+
+public class MiniTezTestServiceCluster extends AbstractService {
+
+ private static final Log LOG = LogFactory.getLog(MiniTezTestServiceCluster.class);
+
+ private final File testWorkDir;
+ private final long availableMemory;
+ private final int numExecutorsPerService;
+ private final String[] localDirs;
+ private final Configuration clusterSpecificConfiguration = new Configuration(false);
+
+ private TezTestService tezTestService;
+
+ public static MiniTezTestServiceCluster create(String clusterName, int numExecutorsPerService, long availableMemory, int numLocalDirs) {
+ return new MiniTezTestServiceCluster(clusterName, numExecutorsPerService, availableMemory, numLocalDirs);
+ }
+
+ // TODO Add support for multiple instances
+ private MiniTezTestServiceCluster(String clusterName, int numExecutorsPerService, long availableMemory, int numLocalDirs) {
+ super(clusterName + "_TezTestServerCluster");
+ Preconditions.checkArgument(numExecutorsPerService > 0);
+ Preconditions.checkArgument(availableMemory > 0);
+ Preconditions.checkArgument(numLocalDirs > 0);
+ String clusterNameTrimmed = clusterName.replace("$", "") + "_TezTestServerCluster";
+ File targetWorkDir = new File("target", clusterNameTrimmed);
+ try {
+ FileContext.getLocalFSFileContext().delete(
+ new Path(targetWorkDir.getAbsolutePath()), true);
+ } catch (Exception e) {
+ LOG.warn("Could not cleanup test workDir: " + targetWorkDir, e);
+ throw new RuntimeException("Could not cleanup test workDir: " + targetWorkDir, e);
+ }
+
+ if (Shell.WINDOWS) {
+ // The test working directory can exceed the maximum path length supported
+ // by some Windows APIs and cmd.exe (260 characters). To work around this,
+ // create a symlink in temporary storage with a much shorter path,
+ // targeting the full path to the test working directory. Then, use the
+ // symlink as the test working directory.
+ String targetPath = targetWorkDir.getAbsolutePath();
+ File link = new File(System.getProperty("java.io.tmpdir"),
+ String.valueOf(System.currentTimeMillis()));
+ String linkPath = link.getAbsolutePath();
+
+ try {
+ FileContext.getLocalFSFileContext().delete(new Path(linkPath), true);
+ } catch (IOException e) {
+ throw new YarnRuntimeException("could not cleanup symlink: " + linkPath, e);
+ }
+
+ // Guarantee target exists before creating symlink.
+ targetWorkDir.mkdirs();
+
+ Shell.ShellCommandExecutor shexec = new Shell.ShellCommandExecutor(
+ Shell.getSymlinkCommand(targetPath, linkPath));
+ try {
+ shexec.execute();
+ } catch (IOException e) {
+ throw new YarnRuntimeException(String.format(
+ "failed to create symlink from %s to %s, shell output: %s", linkPath,
+ targetPath, shexec.getOutput()), e);
+ }
+
+ this.testWorkDir = link;
+ } else {
+ this.testWorkDir = targetWorkDir;
+ }
+ this.numExecutorsPerService = numExecutorsPerService;
+ this.availableMemory = availableMemory;
+
+ // Setup Local Dirs
+ localDirs = new String[numLocalDirs];
+ for (int i = 0 ; i < numLocalDirs ; i++) {
+ File f = new File(testWorkDir, "localDir");
+ f.mkdirs();
+ LOG.info("Created localDir: " + f.getAbsolutePath());
+ localDirs[i] = f.getAbsolutePath();
+ }
+ }
+
+ @Override
+ public void serviceInit(Configuration conf) {
+ tezTestService = new TezTestService(conf, numExecutorsPerService, availableMemory, localDirs);
+ tezTestService.init(conf);
+
+ }
+
+ @Override
+ public void serviceStart() {
+ tezTestService.start();
+
+ clusterSpecificConfiguration.set(TezTestServiceConfConstants.TEZ_TEST_SERVICE_HOSTS,
+ getServiceAddress().getHostName());
+ clusterSpecificConfiguration.setInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_RPC_PORT,
+ getServiceAddress().getPort());
+
+ clusterSpecificConfiguration.setInt(
+ TezTestServiceConfConstants.TEZ_TEST_SERVICE_NUM_EXECUTORS_PER_INSTANCE,
+ numExecutorsPerService);
+ clusterSpecificConfiguration.setLong(
+ TezTestServiceConfConstants.TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB, availableMemory);
+ }
+
+ @Override
+ public void serviceStop() {
+ tezTestService.stop();
+ }
+
+ /**
+ * return the address at which the service is listening
+ * @return host:port
+ */
+ public InetSocketAddress getServiceAddress() {
+ Preconditions.checkState(getServiceState() == STATE.STARTED);
+ return tezTestService.getListenerAddress();
+ }
+
+ public int getShufflePort() {
+ Preconditions.checkState(getServiceState() == STATE.STARTED);
+ return tezTestService.getShufflePort();
+ }
+
+ public Configuration getClusterSpecificConfiguration() {
+ Preconditions.checkState(getServiceState() == STATE.STARTED);
+ return clusterSpecificConfiguration;
+ }
+
+ // Mainly for verification
+ public int getNumSubmissions() {
+ return tezTestService.getNumSubmissions();
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceConfConstants.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceConfConstants.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceConfConstants.java
new file mode 100644
index 0000000..bf4a5bd
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceConfConstants.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.service;
+
+public class TezTestServiceConfConstants {
+
+ private static final String TEZ_TEST_SERVICE_PREFIX = "tez.test.service.";
+
+ /** Number of executors per instance - used by the scheduler */
+ public static final String TEZ_TEST_SERVICE_NUM_EXECUTORS_PER_INSTANCE = TEZ_TEST_SERVICE_PREFIX + "num.executors.per-instance";
+
+ /** Memory available per instance - used by the scheduler */
+ public static final String TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB = TEZ_TEST_SERVICE_PREFIX + "memory.per.instance.mb";
+
+ /** CPUs available per instance - used by the scheduler */
+ public static final String TEZ_TEST_SERVICE_VCPUS_PER_INSTANCE = TEZ_TEST_SERVICE_PREFIX + "vcpus.per.instance";
+
+
+ /** Hosts on which the service is running. Currently assuming a single port for all instances */
+ public static final String TEZ_TEST_SERVICE_HOSTS = TEZ_TEST_SERVICE_PREFIX + "hosts";
+
+ /** Port on which the Service(s) listen. Current a single port for all instances */
+ public static final String TEZ_TEST_SERVICE_RPC_PORT = TEZ_TEST_SERVICE_PREFIX + "rpc.port";
+
+ /** Number of threads to use in the AM to communicate with the external service */
+ public static final String TEZ_TEST_SERVICE_AM_COMMUNICATOR_NUM_THREADS = TEZ_TEST_SERVICE_PREFIX + "communicator.num.threads";
+ public static final int TEZ_TEST_SERVICE_AM_COMMUNICATOR_NUM_THREADS_DEFAULT = 2;
+
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/aadd0492/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceProtocolBlockingPB.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceProtocolBlockingPB.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceProtocolBlockingPB.java
new file mode 100644
index 0000000..1108f72
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceProtocolBlockingPB.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.service;
+
+import org.apache.hadoop.ipc.ProtocolInfo;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
+
+@ProtocolInfo(protocolName = "org.apache.tez.service.TezTestServiceProtocolBlockingPB", protocolVersion = 1)
+public interface TezTestServiceProtocolBlockingPB extends TezTestServiceProtocolProtos.TezTestServiceProtocol.BlockingInterface {
+}
\ No newline at end of file
[16/43] tez git commit: TEZ-2412. Should kill vertex in
DAGImpl#VertexRerunWhileCommitting (zjffdu)
Posted by ss...@apache.org.
TEZ-2412. Should kill vertex in DAGImpl#VertexRerunWhileCommitting (zjffdu)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/6e6ad706
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/6e6ad706
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/6e6ad706
Branch: refs/heads/TEZ-2003
Commit: 6e6ad706f5b6611058541c3bf072343bf002ced5
Parents: 4a6808c
Author: Jeff Zhang <zj...@apache.org>
Authored: Fri May 8 19:55:53 2015 +0800
Committer: Jeff Zhang <zj...@apache.org>
Committed: Fri May 8 19:55:53 2015 +0800
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../apache/tez/dag/app/dag/impl/DAGImpl.java | 20 ++++++++++++++++----
.../apache/tez/dag/app/dag/impl/VertexImpl.java | 3 +--
.../apache/tez/dag/app/dag/impl/TestCommit.java | 14 ++++++++++++--
4 files changed, 30 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/6e6ad706/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 3520768..185e1b0 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,7 @@ INCOMPATIBLE CHANGES
Default max limit increased. Should not affect existing users.
ALL CHANGES:
+ TEZ-2412. Should kill vertex in DAGImpl#VertexRerunWhileCommitting
TEZ-2410. VertexGroupCommitFinishedEvent & VertexCommitStartedEvent is not logged correctly
TEZ-776. Reduce AM mem usage caused by storing TezEvents
TEZ-2423. Tez UI: Remove Attempt Index column from task->attempts page
http://git-wip-us.apache.org/repos/asf/tez/blob/6e6ad706/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
index 1726c18..0a87241 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
@@ -1206,6 +1206,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
return dag.finished(DAGState.SUCCEEDED);
}
} else {
+ // check commits before move to COMPLETED state.
if (dag.commitFutures.isEmpty()) {
return finishWithTerminationCause(dag);
} else {
@@ -1218,7 +1219,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
return dag.getInternalState();
}
- // triggered by commit_complete
+ // triggered by commit_complete, checkCommitsForCompletion should only been called in COMMITTING/TERMINATING
static DAGState checkCommitsForCompletion(DAGImpl dag) {
LOG.info("Checking commits for DAG completion"
+ ", numCompletedVertices=" + dag.numCompletedVertices
@@ -1240,8 +1241,11 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
return dag.finished(DAGState.SUCCEEDED);
}
} else {
- if (!dag.commitFutures.isEmpty()) {
- // pending commits are running
+ Preconditions.checkState(dag.getState() == DAGState.TERMINATING
+ || dag.getState() == DAGState.COMMITTING,
+ "DAG should be in COMMITTING/TERMINATING state, but in " + dag.getState());
+ if (!dag.commitFutures.isEmpty() || dag.numCompletedVertices != dag.numVertices) {
+ // pending commits are running or still some vertices are not completed
return DAGState.TERMINATING;
} else {
return finishWithTerminationCause(dag);
@@ -2155,8 +2159,16 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
@Override
public void transition(DAGImpl dag, DAGEvent event) {
LOG.info("Vertex rerun while dag it is COMMITTING");
+ DAGEventVertexReRunning rerunEvent = (DAGEventVertexReRunning)event;
+ Vertex vertex = dag.getVertex(rerunEvent.getVertexId());
+ dag.reRunningVertices.add(vertex.getVertexId());
+ dag.numSuccessfulVertices--;
+ dag.numCompletedVertices--;
+ dag.addDiagnostic("Vertex re-running"
+ + ", vertexName=" + vertex.getName()
+ + ", vertexId=" + vertex.getVertexId());
dag.cancelCommits();
- dag.trySetTerminationCause(DAGTerminationCause.VERTEX_RERUN_IN_COMMITTING);
+ dag.enactKill(DAGTerminationCause.VERTEX_RERUN_IN_COMMITTING, VertexTerminationCause.VERTEX_RERUN_IN_COMMITTING);
}
}
http://git-wip-us.apache.org/repos/asf/tez/blob/6e6ad706/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
index 3a9558d..6b208b0 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
@@ -3669,6 +3669,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
case INIT_FAILURE:
case INTERNAL_ERROR:
case AM_USERCODE_FAILURE:
+ case VERTEX_RERUN_IN_COMMITTING:
case VERTEX_RERUN_AFTER_COMMIT:
case OTHER_VERTEX_FAILURE: vertex.tryEnactKill(trigger, TaskTerminationCause.OTHER_VERTEX_FAILURE); break;
default://should not occur
@@ -3685,8 +3686,6 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
@Override
public void transition(VertexImpl vertex, VertexEvent event) {
-
-
VertexEventTermination vet = (VertexEventTermination) event;
VertexTerminationCause trigger = vet.getTerminationCause();
String msg = "Vertex received Kill while in COMMITTING state, terminationCause="
http://git-wip-us.apache.org/repos/asf/tez/blob/6e6ad706/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
index 8fc29c2..3d3bca4 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
@@ -1703,15 +1703,25 @@ public class TestCommit {
v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(),
TaskState.SUCCEEDED));
waitUntil(dag, DAGState.COMMITTING);
- dag.handle(new DAGEventVertexReRunning(v1.getVertexId()));
+ TezTaskID newTaskId = TezTaskID.getInstance(v1.getVertexId(), 1);
+ v1.handle(new VertexEventTaskReschedule(newTaskId));
+ // dag is in TERMINATING, wait for the complete of its rescheduled tasks
+ waitUntil(dag, DAGState.TERMINATING);
+ waitUntil(v1, VertexState.TERMINATING);
+ // reschedueled task is killed
+ v1.handle(new VertexEventTaskCompleted(newTaskId, TaskState.KILLED));
waitUntil(dag, DAGState.FAILED);
+ Assert.assertEquals(VertexState.FAILED, v1.getState());
+ Assert.assertEquals(DAGState.FAILED, dag.getState());
+ Assert.assertEquals(VertexTerminationCause.VERTEX_RERUN_IN_COMMITTING, v1.getTerminationCause());
Assert.assertEquals(DAGTerminationCause.VERTEX_RERUN_IN_COMMITTING, dag.getTerminationCause());
Assert.assertTrue(dag.commitFutures.isEmpty());
historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
historyEventHandler.verifyVertexCommitStartedEvent(v1.getVertexId(), 0);
- historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 1);
+ // VertexFinishedEvent is logged twice due to vertex-rerun
+ historyEventHandler.verifyVertexFinishedEvent(v1.getVertexId(), 2);
historyEventHandler.verifyVertexCommitStartedEvent(v2.getVertexId(), 0);
historyEventHandler.verifyVertexFinishedEvent(v2.getVertexId(), 1);
historyEventHandler.verifyVertexCommitStartedEvent(v3.getVertexId(), 0);
[36/43] tez git commit: TEZ-2285. Allow TaskCommunicators to indicate
task/container liveness. (sseth)
Posted by ss...@apache.org.
TEZ-2285. Allow TaskCommunicators to indicate task/container liveness. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/5239a45b
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/5239a45b
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/5239a45b
Branch: refs/heads/TEZ-2003
Commit: 5239a45b78cdd5eec194a36545353d07ad94f34b
Parents: 2b05376
Author: Siddharth Seth <ss...@apache.org>
Authored: Tue Apr 7 13:22:09 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:31 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../apache/tez/dag/api/TaskCommunicatorContext.java | 4 ++++
.../tez/dag/app/TaskAttemptListenerImpTezDag.java | 10 ++++++++++
.../apache/tez/dag/app/TezTaskCommunicatorImpl.java | 16 +++++++++-------
4 files changed, 24 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/5239a45b/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index e2c428d..9d6b220 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -14,5 +14,6 @@ ALL CHANGES:
TEZ-2241. Miscellaneous fixes after last reabse.
TEZ-2283. Fixes after rebase 04/07.
TEZ-2284. Separate TaskReporter into an interface.
+ TEZ-2285. Allow TaskCommunicators to indicate task/container liveness.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/5239a45b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
index a85fb7f..0c3bac3 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
@@ -43,6 +43,10 @@ public interface TaskCommunicatorContext {
boolean isKnownContainer(ContainerId containerId);
+ void taskAlive(TezTaskAttemptID taskAttemptId);
+
+ void containerAlive(ContainerId containerId);
+
// TODO TEZ-2003 Move to vertex, taskIndex, version. Rename to taskAttempt*
void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId);
http://git-wip-us.apache.org/repos/asf/tez/blob/5239a45b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index 0ee448b..cd39cc8 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -259,6 +259,16 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
@Override
+ public void taskAlive(TezTaskAttemptID taskAttemptId) {
+ taskHeartbeatHandler.pinged(taskAttemptId);
+ }
+
+ @Override
+ public void containerAlive(ContainerId containerId) {
+ pingContainerHeartbeatHandler(containerId);
+ }
+
+ @Override
public void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId) {
context.getEventHandler()
.handle(new TaskAttemptEventStartedRemotely(taskAttemptID, containerId, null));
http://git-wip-us.apache.org/repos/asf/tez/blob/5239a45b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
index 9d0c031..ef4f764 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -65,17 +65,19 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
null, true, null, null, false);
private final TaskCommunicatorContext taskCommunicatorContext;
+ private final TezTaskUmbilicalProtocol taskUmbilical;
- private final ConcurrentMap<ContainerId, ContainerInfo> registeredContainers =
+ protected final ConcurrentMap<ContainerId, ContainerInfo> registeredContainers =
new ConcurrentHashMap<ContainerId, ContainerInfo>();
- private final ConcurrentMap<TaskAttempt, ContainerId> attemptToContainerMap =
+ protected final ConcurrentMap<TaskAttempt, ContainerId> attemptToContainerMap =
new ConcurrentHashMap<TaskAttempt, ContainerId>();
- private final TezTaskUmbilicalProtocol taskUmbilical;
- private final String tokenIdentifier;
- private final Token<JobTokenIdentifier> sessionToken;
+
+ protected final String tokenIdentifier;
+ protected final Token<JobTokenIdentifier> sessionToken;
protected InetSocketAddress address;
- private Server server;
+
+ protected volatile Server server;
public static final class ContainerInfo {
@@ -438,7 +440,7 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
// Holder for Task information, which eventually will likely be VertexImplm taskIndex, attemptIndex
- private static class TaskAttempt {
+ protected static class TaskAttempt {
// TODO TEZ-2003 Change this to work with VertexName, int id, int version
// TODO TEZ-2003 Avoid constructing this unit all over the place
private TezTaskAttemptID taskAttemptId;
[40/43] tez git commit: TEZ-2381. Fixes after rebase 04/28. (sseth)
Posted by ss...@apache.org.
TEZ-2381. Fixes after rebase 04/28. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/9e098f70
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/9e098f70
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/9e098f70
Branch: refs/heads/TEZ-2003
Commit: 9e098f7024f0be9d53f28230893890766e37fe95
Parents: 0137467
Author: Siddharth Seth <ss...@apache.org>
Authored: Tue Apr 28 13:41:12 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:42:26 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../dag/app/TaskAttemptListenerImpTezDag.java | 17 ++++----
.../app/TestTaskAttemptListenerImplTezDag.java | 44 +++++++++++++++-----
3 files changed, 42 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/9e098f70/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index f6bc8e7..d42aaf8 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -18,5 +18,6 @@ ALL CHANGES:
TEZ-2302. Allow TaskCommunicators to subscribe for Vertex updates.
TEZ-2347. Expose additional information in TaskCommunicatorContext.
TEZ-2361. Propagate dag completion to TaskCommunicator.
+ TEZ-2381. Fixes after rebase 04/28.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/9e098f70/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index 03b5602..d30919b 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -17,28 +17,22 @@
package org.apache.tez.dag.app;
-import javax.annotation.Nullable;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
-import java.net.InetSocketAddress;
-import java.net.URISyntaxException;
-import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
+import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.collections4.ListUtils;
import org.apache.tez.dag.app.dag.event.TaskAttemptEvent;
import org.apache.tez.dag.app.dag.event.TaskAttemptEventStatusUpdate;
import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
import org.apache.tez.runtime.api.events.TaskStatusUpdateEvent;
import org.apache.tez.runtime.api.impl.EventType;
-import com.google.common.base.Preconditions;
-import org.apache.tez.dag.api.event.VertexState;
import org.apache.tez.dag.api.event.VertexStateUpdate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -62,7 +56,6 @@ import org.apache.tez.dag.app.dag.Task;
import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed;
import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptKilled;
import org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely;
-import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
import org.apache.tez.dag.app.dag.event.VertexEventRouteEvent;
import org.apache.tez.dag.app.rm.container.AMContainerTask;
import org.apache.tez.dag.records.TezTaskAttemptID;
@@ -151,7 +144,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
private TaskCommunicator createTaskCommunicator(String taskCommClassIdentifier, int taskCommIndex) {
if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
LOG.info("Using Default Task Communicator");
- return new TezTaskCommunicatorImpl(taskCommunicatorContexts[taskCommIndex]);
+ return createTezTaskCommunicator(taskCommunicatorContexts[taskCommIndex]);
} else if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
LOG.info("Using Default Local Task Communicator");
return new TezLocalTaskCommunicatorImpl(taskCommunicatorContexts[taskCommIndex]);
@@ -174,6 +167,12 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
}
}
+
+ @VisibleForTesting
+ protected TezTaskCommunicatorImpl createTezTaskCommunicator(TaskCommunicatorContext context) {
+ return new TezTaskCommunicatorImpl(context);
+ }
+
public TaskHeartbeatResponse heartbeat(TaskHeartbeatRequest request)
throws IOException, TezException {
ContainerId containerId = ConverterUtils.toContainerId(request
http://git-wip-us.apache.org/repos/asf/tez/blob/9e098f70/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
index 076f9e0..ae9ebc0 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
@@ -43,11 +43,9 @@ import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.tez.common.ContainerContext;
import org.apache.tez.common.ContainerTask;
import org.apache.tez.common.security.JobTokenSecretManager;
+import org.apache.tez.dag.api.TaskHeartbeatRequest;
import org.apache.tez.dag.api.TezException;
import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.event.EventHandler;
-import org.apache.tez.common.ContainerContext;
-import org.apache.tez.common.ContainerTask;
import org.apache.tez.common.TezTaskUmbilicalProtocol;
import org.apache.tez.dag.api.TaskCommunicatorContext;
import org.apache.tez.dag.app.dag.DAG;
@@ -79,7 +77,9 @@ import org.mockito.ArgumentCaptor;
// TODO TEZ-2003 Rename to TestTezTaskCommunicator
public class TestTaskAttemptListenerImplTezDag {
private ApplicationId appId;
+ private ApplicationAttemptId appAttemptId;
private AppContext appContext;
+ Credentials credentials;
AMContainerMap amContainerMap;
EventHandler eventHandler;
DAG dag;
@@ -95,11 +95,13 @@ public class TestTaskAttemptListenerImplTezDag {
@Before
public void setUp() {
appId = ApplicationId.newInstance(1000, 1);
+ appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
dag = mock(DAG.class);
TezDAGID dagID = TezDAGID.getInstance(appId, 1);
vertexID = TezVertexID.getInstance(dagID, 1);
taskID = TezTaskID.getInstance(vertexID, 1);
taskAttemptID = TezTaskAttemptID.getInstance(taskID, 1);
+ credentials = new Credentials();
amContainerMap = mock(AMContainerMap.class);
Map<ApplicationAccessType, String> appAcls = new HashMap<ApplicationAccessType, String>();
@@ -111,6 +113,8 @@ public class TestTaskAttemptListenerImplTezDag {
doReturn(dag).when(appContext).getCurrentDAG();
doReturn(appAcls).when(appContext).getApplicationACLs();
doReturn(amContainerMap).when(appContext).getAllContainers();
+ doReturn(appAttemptId).when(appContext).getApplicationAttemptId();
+ doReturn(credentials).when(appContext).getAppCredentials();
NodeId nodeId = NodeId.newInstance("localhost", 0);
AMContainer amContainer = mock(AMContainer.class);
@@ -152,7 +156,7 @@ public class TestTaskAttemptListenerImplTezDag {
assertEquals(taskSpec, containerTask.getTaskSpec());
// Task unregistered. Should respond to heartbeats
- taskAttemptListener.unregisterTaskAttempt(taskAttemptId, 0);
+ taskAttemptListener.unregisterTaskAttempt(taskAttemptID, 0);
containerTask = tezUmbilical.getTask(containerContext2);
assertNull(containerTask);
@@ -182,7 +186,7 @@ public class TestTaskAttemptListenerImplTezDag {
TezTaskUmbilicalProtocol tezUmbilical = taskCommunicator.getUmbilical();
ContainerId containerId1 = createContainerId(appId, 1);
- doReturn(mock(AMContainer.class)).when(amContainerMap).get(containerId1);
+
ContainerContext containerContext1 = new ContainerContext(containerId1.toString());
taskAttemptListener.registerRunningContainer(containerId1, 0);
containerTask = tezUmbilical.getTask(containerContext1);
@@ -259,7 +263,6 @@ public class TestTaskAttemptListenerImplTezDag {
int fromEventId, int maxEvents, int nextFromEventId,
List<TezEvent> sendEvents) throws IOException, TezException {
ContainerId containerId = createContainerId(appId, 1);
- long requestId = 0;
Vertex vertex = mock(Vertex.class);
doReturn(vertex).when(dag).getVertex(vertexID);
@@ -267,13 +270,13 @@ public class TestTaskAttemptListenerImplTezDag {
TaskAttemptEventInfo eventInfo = new TaskAttemptEventInfo(nextFromEventId, sendEvents);
doReturn(eventInfo).when(vertex).getTaskAttemptTezEvents(taskAttemptID, fromEventId, maxEvents);
- taskAttemptListener.registerRunningContainer(containerId);
- taskAttemptListener.registerTaskAttempt(amContainerTask, containerId);
+ taskAttemptListener.registerRunningContainer(containerId, 0);
+ taskAttemptListener.registerTaskAttempt(amContainerTask, containerId, 0);
+
+ TaskHeartbeatRequest request = mock(TaskHeartbeatRequest.class);
- TezHeartbeatRequest request = mock(TezHeartbeatRequest.class);
doReturn(containerId.toString()).when(request).getContainerIdentifier();
- doReturn(taskAttemptID).when(request).getCurrentTaskAttemptID();
- doReturn(++requestId).when(request).getRequestId();
+ doReturn(taskAttemptID).when(request).getTaskAttemptId();
doReturn(events).when(request).getEvents();
doReturn(maxEvents).when(request).getMaxEvents();
doReturn(fromEventId).when(request).getStartIndex();
@@ -287,6 +290,25 @@ public class TestTaskAttemptListenerImplTezDag {
return ContainerId.newInstance(appAttemptId, containerIdx);
}
+ private static class TaskAttemptListenerImplForTest extends TaskAttemptListenerImpTezDag {
+
+ public TaskAttemptListenerImplForTest(AppContext context,
+ TaskHeartbeatHandler thh,
+ ContainerHeartbeatHandler chh,
+ JobTokenSecretManager jobTokenSecretManager,
+ String[] taskCommunicatorClassIdentifiers,
+ boolean isPureLocalMode) {
+ super(context, thh, chh, jobTokenSecretManager, taskCommunicatorClassIdentifiers,
+ isPureLocalMode);
+ }
+
+ @Override
+ protected TezTaskCommunicatorImpl createTezTaskCommunicator(TaskCommunicatorContext context) {
+ return new TezTaskCommunicatorImplForTest(context);
+ }
+
+ }
+
private static class TezTaskCommunicatorImplForTest extends TezTaskCommunicatorImpl {
public TezTaskCommunicatorImplForTest(
[06/43] tez git commit: TEZ-2419. Inputs/Outputs should inform the
Processor about Interrupts when interrupted during a blocking Op. (sseth)
Posted by ss...@apache.org.
TEZ-2419. Inputs/Outputs should inform the Processor about Interrupts
when interrupted during a blocking Op. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/55308630
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/55308630
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/55308630
Branch: refs/heads/TEZ-2003
Commit: 55308630b6354ce070550d1ea4efbedbbae8e13a
Parents: 7476fae
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed May 6 14:39:08 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Wed May 6 14:39:08 2015 -0700
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../tez/mapreduce/lib/MRReaderMapReduce.java | 3 +-
.../tez/mapreduce/lib/MRReaderMapred.java | 3 +
.../apache/tez/mapreduce/output/MROutput.java | 3 +-
.../library/api/IOInterruptedException.java | 40 +++++++
.../tez/runtime/library/api/KeyValueReader.java | 2 +
.../tez/runtime/library/api/KeyValueWriter.java | 2 +
.../runtime/library/api/KeyValuesReader.java | 1 +
.../runtime/library/api/KeyValuesWriter.java | 2 +
.../common/readers/UnorderedKVReader.java | 4 +-
.../common/shuffle/orderedgrouped/Shuffle.java | 6 +-
.../common/sort/impl/ExternalSorter.java | 3 +-
.../common/sort/impl/PipelinedSorter.java | 20 ++--
.../common/sort/impl/dflt/DefaultSorter.java | 11 +-
.../writers/UnorderedPartitionedKVWriter.java | 3 +-
.../input/ConcatenatedMergedKeyValueInput.java | 8 +-
.../input/ConcatenatedMergedKeyValuesInput.java | 8 +-
.../library/input/OrderedGroupedKVInput.java | 12 +-
.../common/readers/TestUnorderedKVReader.java | 22 ++++
.../input/TestOrderedGroupedKVInput.java | 113 +++++++++++++++++++
20 files changed, 246 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index fd45454..c865f12 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -7,6 +7,7 @@ Release 0.8.0: Unreleased
INCOMPATIBLE CHANGES
ALL CHANGES:
+ TEZ-2419. Inputs/Outputs should inform the Processor about Interrupts when interrupted during a blocking Op.
TEZ-1752. Inputs / Outputs in the Runtime library should be interruptable.
Release 0.7.0: Unreleased
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapReduce.java
----------------------------------------------------------------------
diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapReduce.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapReduce.java
index 0495751..5fc3e49 100644
--- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapReduce.java
+++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapReduce.java
@@ -20,6 +20,7 @@ package org.apache.tez.mapreduce.lib;
import java.io.IOException;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.mapred.JobConf;
@@ -116,7 +117,7 @@ public class MRReaderMapReduce extends MRReader {
hasNext = recordReader.nextKeyValue();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
- throw new IOException("Interrupted while checking for next key-value", e);
+ throw new IOInterruptedException("Interrupted while checking for next key-value", e);
}
if (hasNext) {
inputRecordCounter.increment(1);
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapred.java
----------------------------------------------------------------------
diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapred.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapred.java
index 366e7a7..1bf71f6 100644
--- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapred.java
+++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapred.java
@@ -117,6 +117,9 @@ public class MRReaderMapred extends MRReader {
hasCompletedProcessing();
completedProcessing = true;
}
+ // The underlying reader does not throw InterruptedExceptions. Cannot convert to an
+ // IOInterruptedException without checking the interrupt flag on each request, which is also
+ // not guaranteed. Relying on the user to ensure Interrupts are handled correctly.
return hasNext;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/output/MROutput.java
----------------------------------------------------------------------
diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/output/MROutput.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/output/MROutput.java
index d19f707..a3b19ed 100644
--- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/output/MROutput.java
+++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/output/MROutput.java
@@ -31,6 +31,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import org.apache.commons.lang.StringUtils;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience.Public;
@@ -499,7 +500,7 @@ public class MROutput extends AbstractLogicalOutput {
newRecordWriter.write(key, value);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
- throw new IOException("Interrupted while writing next key-value",e);
+ throw new IOInterruptedException("Interrupted while writing next key-value",e);
}
} else {
oldRecordWriter.write(key, value);
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/IOInterruptedException.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/IOInterruptedException.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/IOInterruptedException.java
new file mode 100644
index 0000000..776b2a3
--- /dev/null
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/IOInterruptedException.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.runtime.library.api;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * Indicates that an IOOperation was interrupted
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class IOInterruptedException extends IOException {
+
+ public IOInterruptedException(String message) {
+ super(message);
+ }
+
+ public IOInterruptedException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public IOInterruptedException(Throwable cause) {
+ super(cause);
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueReader.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueReader.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueReader.java
index d504d08..47f335b 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueReader.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueReader.java
@@ -49,6 +49,7 @@ public abstract class KeyValueReader extends Reader {
* @return true if another key/value(s) pair exists, false if there are no more.
* @throws IOException
* if an error occurs
+ * @throws {@link IOInterruptedException} if IO was performing a blocking operation and was interrupted
*/
public abstract boolean next() throws IOException;
@@ -63,6 +64,7 @@ public abstract class KeyValueReader extends Reader {
/**
* Returns the current value
* @return the current value
+ *
* @throws IOException
*/
public abstract Object getCurrentValue() throws IOException;
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueWriter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueWriter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueWriter.java
index 6acb24b..b5c4294 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueWriter.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValueWriter.java
@@ -39,6 +39,8 @@ public abstract class KeyValueWriter extends Writer {
* the value to write
* @throws IOException
* if an error occurs
+ * @throws {@link IOInterruptedException} if IO was interrupted
+ * @throws {@link IOInterruptedException} if IO was performing a blocking operation and was interrupted
*/
public abstract void write(Object key, Object value) throws IOException;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesReader.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesReader.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesReader.java
index 510f4b7..7760818 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesReader.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesReader.java
@@ -49,6 +49,7 @@ public abstract class KeyValuesReader extends Reader {
* @return true if another key/value(s) pair exists, false if there are no more.
* @throws IOException
* if an error occurs
+ * @throws {@link IOInterruptedException} if IO was performing a blocking operation and was interrupted
*/
public abstract boolean next() throws IOException;
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesWriter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesWriter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesWriter.java
index 50fc2d6..9cdde43 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesWriter.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/KeyValuesWriter.java
@@ -38,6 +38,8 @@ public abstract class KeyValuesWriter extends KeyValueWriter {
* @param values
* values to write
* @throws java.io.IOException
+ * @throws {@link IOInterruptedException} if IO was interrupted
+ * @throws {@link IOInterruptedException} if IO was performing a blocking operation and was interrupted
*/
public abstract void write(Object key, Iterable<Object> values) throws IOException;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/readers/UnorderedKVReader.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/readers/UnorderedKVReader.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/readers/UnorderedKVReader.java
index fc2e312..a8dd1b2 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/readers/UnorderedKVReader.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/readers/UnorderedKVReader.java
@@ -20,6 +20,7 @@ package org.apache.tez.runtime.library.common.readers;
import java.io.IOException;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience.Private;
@@ -168,7 +169,6 @@ public class UnorderedKVReader<K, V> extends KeyValueReader {
*
* @return true if the next input exists, false otherwise
* @throws IOException
- * @throws InterruptedException
*/
private boolean moveToNextInput() throws IOException {
if (currentReader != null) { // Close the current reader.
@@ -185,7 +185,7 @@ public class UnorderedKVReader<K, V> extends KeyValueReader {
} catch (InterruptedException e) {
LOG.warn("Interrupted while waiting for next available input", e);
Thread.currentThread().interrupt();
- throw new IOException(e);
+ throw new IOInterruptedException(e);
}
if (currentFetchedInput == null) {
hasCompletedProcessing();
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
index ee05378..cb12a63 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
@@ -305,6 +305,7 @@ public class Shuffle implements ExceptionReporter {
kvIter = runShuffleFuture.get();
} catch (ExecutionException e) {
Throwable cause = e.getCause();
+ // Processor interrupted while waiting for errors, will see an InterruptedException.
handleThrowable(cause);
}
if (isShutDown.get()) {
@@ -375,7 +376,9 @@ public class Shuffle implements ExceptionReporter {
try {
kvIter = merger.close();
} catch (Throwable e) {
- throw new ShuffleError("Error while doing final merge " , e);
+ // Set the throwable so that future.get() sees the reported errror.
+ throwable.set(e);
+ throw new ShuffleError("Error while doing final merge ", e);
}
mergePhaseTime.setValue(System.currentTimeMillis() - startTime);
@@ -513,6 +516,7 @@ public class Shuffle implements ExceptionReporter {
LOG.info("Already shutdown. Ignoring error");
} else {
LOG.error("ShuffleRunner failed with error", t);
+ // In case of an abort / Interrupt - the runtime makes sure that this is ignored.
inputContext.fatalError(t, "Shuffle Runner Failed");
cleanupIgnoreErrors();
}
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java
index ca4d889..40d22fe 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java
@@ -25,6 +25,7 @@ import java.util.Iterator;
import java.util.Map;
import com.google.common.collect.Maps;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience.Private;
@@ -267,7 +268,7 @@ public abstract class ExternalSorter {
combiner.combine(kvIter, writer);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
- throw new IOException(e);
+ throw new IOInterruptedException("Combiner interrupted", e);
}
}
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java
index 030440e..d9de921 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java
@@ -29,6 +29,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.PriorityQueue;
+import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
@@ -37,6 +38,7 @@ import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Lists;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -341,7 +343,7 @@ public class PipelinedSorter extends ExternalSorter {
mapOutputByteCounter.increment(valend - keystart);
}
- public void spill() throws IOException {
+ public void spill() throws IOException {
// create spill file
final long size = capacity +
+ (partitions * APPROX_HEADER_LENGTH);
@@ -352,7 +354,13 @@ public class PipelinedSorter extends ExternalSorter {
FSDataOutputStream out = rfs.create(filename, true, 4096);
try {
- merger.ready(); // wait for all the future results from sort threads
+ try {
+ merger.ready(); // wait for all the future results from sort threads
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ LOG.info("Interrupted while waiting for mergers to complete");
+ throw new IOInterruptedException("Interrupted while waiting for mergers to complete", e);
+ }
LOG.info("Spilling to " + filename.toString());
for (int i = 0; i < partitions; ++i) {
if (isThreadInterrupted()) {
@@ -391,9 +399,6 @@ public class PipelinedSorter extends ExternalSorter {
//TODO: honor cache limits
indexCacheList.add(spillRec);
++numSpills;
- } catch(InterruptedException ie) {
- // TODO:the combiner has been interrupted
- Thread.currentThread().interrupt();
} finally {
out.close();
}
@@ -568,6 +573,7 @@ public class PipelinedSorter extends ExternalSorter {
cleanup();
}
Thread.currentThread().interrupt();
+ throw new IOInterruptedException("Interrupted while closing Output", ie);
}
}
@@ -1046,7 +1052,7 @@ public class PipelinedSorter extends ExternalSorter {
iter = futureIter.get();
this.add(iter);
}
-
+
StringBuilder sb = new StringBuilder();
for(SpanIterator sp: heap) {
sb.append(sp.toString());
@@ -1056,7 +1062,7 @@ public class PipelinedSorter extends ExternalSorter {
}
LOG.info("Heap = " + sb.toString());
return true;
- } catch(Exception e) {
+ } catch(ExecutionException e) {
LOG.info(e.toString());
return false;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/dflt/DefaultSorter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/dflt/DefaultSorter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/dflt/DefaultSorter.java
index 9783c79..afe07f0 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/dflt/DefaultSorter.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/dflt/DefaultSorter.java
@@ -32,6 +32,7 @@ import java.util.concurrent.locks.ReentrantLock;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import org.apache.commons.lang.exception.ExceptionUtils;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -607,7 +608,7 @@ public class DefaultSorter extends ExternalSorter implements IndexedSortable {
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
- throw new IOException(
+ throw new IOInterruptedException(
"Buffer interrupted while waiting for the writer", e);
}
}
@@ -644,7 +645,7 @@ public class DefaultSorter extends ExternalSorter implements IndexedSortable {
LOG.info("Spill thread interrupted");
//Reset status
Thread.currentThread().interrupt();
- throw new IOException("Spill failed", e);
+ throw new IOInterruptedException("Spill failed", e);
}
}
@@ -769,7 +770,11 @@ public class DefaultSorter extends ExternalSorter implements IndexedSortable {
+ " failed : " + ExceptionUtils.getStackTrace(lspillException);
outputContext.fatalError(lspillException, logMsg);
}
- throw new IOException("Spill failed", lspillException);
+ if (lspillException instanceof InterruptedException) {
+ throw new IOInterruptedException("Spill failed", lspillException);
+ } else {
+ throw new IOException("Spill failed", lspillException);
+ }
}
}
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/UnorderedPartitionedKVWriter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/UnorderedPartitionedKVWriter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/UnorderedPartitionedKVWriter.java
index 37d8be6..9a98cd1 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/UnorderedPartitionedKVWriter.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/UnorderedPartitionedKVWriter.java
@@ -54,6 +54,7 @@ import org.apache.tez.common.counters.TezCounter;
import org.apache.tez.runtime.api.Event;
import org.apache.tez.runtime.api.OutputContext;
import org.apache.tez.runtime.api.events.CompositeDataMovementEvent;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
import org.apache.tez.runtime.library.common.Constants;
import org.apache.tez.runtime.library.common.sort.impl.IFile;
@@ -354,7 +355,7 @@ public class UnorderedPartitionedKVWriter extends BaseUnorderedPartitionedKVWrit
return availableBuffers.take();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
- throw new IOException("Interrupted while waiting for next buffer", e);
+ throw new IOInterruptedException("Interrupted while waiting for next buffer", e);
}
}
} else {
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValueInput.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValueInput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValueInput.java
index 14b1e2c..45784d9 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValueInput.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValueInput.java
@@ -64,7 +64,13 @@ public class ConcatenatedMergedKeyValueInput extends MergedLogicalInput {
currentReader = (KeyValueReader) reader;
currentReaderIndex++;
} catch (Exception e) {
- throw new IOException(e);
+ // An InterruptedException is not expected here since this works off of
+ // underlying readers which take care of throwing IOInterruptedExceptions
+ if (e instanceof IOException) {
+ throw (IOException) e;
+ } else {
+ throw new IOException(e);
+ }
}
}
return true;
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValuesInput.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValuesInput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValuesInput.java
index 2a1e4c6..27ff324 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValuesInput.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/ConcatenatedMergedKeyValuesInput.java
@@ -65,7 +65,13 @@ public class ConcatenatedMergedKeyValuesInput extends MergedLogicalInput {
currentReader = (KeyValuesReader) reader;
currentReaderIndex++;
} catch (Exception e) {
- throw new IOException(e);
+ // An InterruptedException is not expected here since this works off of
+ // underlying readers which take care of throwing IOInterruptedExceptions
+ if (e instanceof IOException) {
+ throw (IOException)e;
+ } else {
+ throw new IOException(e);
+ }
}
}
return true;
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedKVInput.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedKVInput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedKVInput.java
index 49cf102..12a5955 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedKVInput.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedKVInput.java
@@ -27,6 +27,8 @@ import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicBoolean;
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
@@ -121,7 +123,7 @@ public class OrderedGroupedKVInput extends AbstractLogicalInput {
if (!isStarted.get()) {
memoryUpdateCallbackHandler.validateUpdateReceived();
// Start the shuffle - copy and merge
- shuffle = new Shuffle(getContext(), conf, getNumPhysicalInputs(), memoryUpdateCallbackHandler.getMemoryAssigned());
+ shuffle = createShuffle();
shuffle.run();
if (LOG.isDebugEnabled()) {
LOG.debug("Initialized the handlers in shuffle..Safe to start processing..");
@@ -137,6 +139,11 @@ public class OrderedGroupedKVInput extends AbstractLogicalInput {
}
}
+ @VisibleForTesting
+ Shuffle createShuffle() throws IOException {
+ return new Shuffle(getContext(), conf, getNumPhysicalInputs(), memoryUpdateCallbackHandler.getMemoryAssigned());
+ }
+
/**
* Check if the input is ready for consumption
*
@@ -207,6 +214,7 @@ public class OrderedGroupedKVInput extends AbstractLogicalInput {
* previous K-V pair will throw an Exception
*
* @return a KVReader over the sorted input.
+ * @throws {@link IOInterruptedException} if IO was performing a blocking operation and was interrupted
*/
@Override
public KeyValuesReader getReader() throws IOException, TezException {
@@ -240,7 +248,7 @@ public class OrderedGroupedKVInput extends AbstractLogicalInput {
waitForInputReady();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
- throw new IOException("Interrupted while waiting for input ready", e);
+ throw new IOInterruptedException("Interrupted while waiting for input ready", e);
}
}
@SuppressWarnings("rawtypes")
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/readers/TestUnorderedKVReader.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/readers/TestUnorderedKVReader.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/readers/TestUnorderedKVReader.java
index 51ea42d..80bdc42 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/readers/TestUnorderedKVReader.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/readers/TestUnorderedKVReader.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.io.Text;
import org.apache.tez.common.counters.TaskCounter;
import org.apache.tez.common.counters.TezCounter;
import org.apache.tez.common.counters.TezCounters;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
import org.apache.tez.runtime.library.common.InputAttemptIdentifier;
import org.apache.tez.runtime.library.common.shuffle.FetchedInput;
@@ -48,6 +49,7 @@ import java.util.LinkedList;
import static junit.framework.TestCase.fail;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
@@ -165,4 +167,24 @@ public class TestUnorderedKVReader {
}
}
+ @Test(timeout = 5000)
+ public void testInterruptOnNext() throws IOException, InterruptedException {
+ ShuffleManager shuffleManager = mock(ShuffleManager.class);
+
+ // Simulate an interrupt while waiting for the next fetched input.
+ doThrow(new InterruptedException()).when(shuffleManager).getNextInput();
+ TezCounters counters = new TezCounters();
+ TezCounter inputRecords = counters.findCounter(TaskCounter.INPUT_RECORDS_PROCESSED);
+ UnorderedKVReader<Text, Text> reader =
+ new UnorderedKVReader<Text, Text>(shuffleManager, defaultConf, null, false, -1, -1,
+ inputRecords);
+
+ try {
+ reader.next();
+ fail("No data available to reader. Should not be able to access any record");
+ } catch (IOInterruptedException e) {
+ // Expected exception. Any other should fail the test.
+ }
+ }
+
}
http://git-wip-us.apache.org/repos/asf/tez/blob/55308630/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/input/TestOrderedGroupedKVInput.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/input/TestOrderedGroupedKVInput.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/input/TestOrderedGroupedKVInput.java
new file mode 100644
index 0000000..d4be802
--- /dev/null
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/input/TestOrderedGroupedKVInput.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.runtime.library.input;
+
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.tez.common.TezUtils;
+import org.apache.tez.common.counters.TezCounters;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezException;
+import org.apache.tez.dag.api.UserPayload;
+import org.apache.tez.runtime.api.InputContext;
+import org.apache.tez.runtime.library.api.IOInterruptedException;
+import org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler;
+import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle;
+import org.junit.Assert;
+import org.junit.Test;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+public class TestOrderedGroupedKVInput {
+
+ @Test(timeout = 5000)
+ public void testInterruptWhileAwaitingInput() throws IOException, TezException {
+
+ InputContext inputContext = createMockInputContext();
+ OrderedGroupedKVInput kvInput = new OrderedGroupedKVInputForTest(inputContext, 10);
+ kvInput.initialize();
+
+ kvInput.start();
+
+ try {
+ kvInput.getReader();
+ Assert.fail("getReader should not return since underlying inputs are not ready");
+ } catch (IOException e) {
+ Assert.assertTrue(e instanceof IOInterruptedException);
+ }
+
+ }
+
+
+ private InputContext createMockInputContext() throws IOException {
+ InputContext inputContext = mock(InputContext.class);
+ Configuration conf = new TezConfiguration();
+ UserPayload payLoad = TezUtils.createUserPayloadFromConf(conf);
+ String[] workingDirs = new String[]{"workDir1"};
+ TezCounters counters = new TezCounters();
+
+
+ doReturn(payLoad).when(inputContext).getUserPayload();
+ doReturn(workingDirs).when(inputContext).getWorkDirs();
+ doReturn(200 * 1024 * 1024l).when(inputContext).getTotalMemoryAvailableToTask();
+ doReturn(counters).when(inputContext).getCounters();
+
+ doAnswer(new Answer() {
+ @Override
+ public Object answer(InvocationOnMock invocation) throws Throwable {
+ Object[] args = invocation.getArguments();
+
+ if (args[1] instanceof MemoryUpdateCallbackHandler) {
+ MemoryUpdateCallbackHandler memUpdateCallbackHandler =
+ (MemoryUpdateCallbackHandler) args[1];
+ memUpdateCallbackHandler.memoryAssigned(200 * 1024 * 1024);
+ } else {
+ Assert.fail();
+ }
+ return null;
+ }
+ }).when(inputContext).requestInitialMemory(any(long.class),
+ any(MemoryUpdateCallbackHandler.class));
+
+ return inputContext;
+ }
+
+ static class OrderedGroupedKVInputForTest extends OrderedGroupedKVInput {
+
+ public OrderedGroupedKVInputForTest(InputContext inputContext, int numPhysicalInputs) {
+ super(inputContext, numPhysicalInputs);
+ }
+
+ Shuffle createShuffle() throws IOException {
+ Shuffle shuffle = mock(Shuffle.class);
+ try {
+ doThrow(new InterruptedException()).when(shuffle).waitForInput();
+ } catch (InterruptedException e) {
+ Assert.fail();
+ } catch (TezException e) {
+ Assert.fail();
+ }
+ return shuffle;
+ }
+ }
+
+}
[34/43] tez git commit: TEZ-2283. Fixes after rebase 04/07. (sseth)
Posted by ss...@apache.org.
TEZ-2283. Fixes after rebase 04/07. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/26164791
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/26164791
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/26164791
Branch: refs/heads/TEZ-2003
Commit: 2616479150b7d4c6c0f7dd0359fff5e99b75869d
Parents: 5a46aa5
Author: Siddharth Seth <ss...@apache.org>
Authored: Tue Apr 7 13:13:30 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:30 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../java/org/apache/tez/dag/app/rm/TestContainerReuse.java | 8 ++++----
.../org/apache/tez/service/impl/ContainerRunnerImpl.java | 4 +---
3 files changed, 6 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/26164791/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index d51686d..6a4399c 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -12,5 +12,6 @@ ALL CHANGES:
TEZ-2175. Task priority should be available to the TaskCommunicator plugin.
TEZ-2187. Allow TaskCommunicators to report failed / killed attempts.
TEZ-2241. Miscellaneous fixes after last reabse.
+ TEZ-2283. Fixes after rebase 04/07.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/26164791/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
index d45346a..8b4ff07 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
@@ -1242,9 +1242,9 @@ public class TestContainerReuse {
assertEquals(1, assignEvent.getRemoteTaskLocalResources().size());
// Task assigned to container completed successfully. Container should be re-used.
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta111, container1.getId(), TaskAttemptState.SUCCEEDED));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta111, container1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
drainableAppCallback.drain();
- verify(taskScheduler).deallocateTask(eq(ta111), eq(true));
+ verify(taskScheduler).deallocateTask(eq(ta111), eq(true), eq((TaskAttemptEndReason)null));
verify(taskSchedulerEventHandler).taskAllocated(eq(ta112), any(Object.class), eq(container1));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
@@ -1254,9 +1254,9 @@ public class TestContainerReuse {
// Task assigned to container completed successfully.
// Verify reuse across hosts.
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta112, container1.getId(), TaskAttemptState.SUCCEEDED));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta112, container1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
drainableAppCallback.drain();
- verify(taskScheduler).deallocateTask(eq(ta112), eq(true));
+ verify(taskScheduler).deallocateTask(eq(ta112), eq(true), eq((TaskAttemptEndReason)null));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
eventHandler.reset();
http://git-wip-us.apache.org/repos/asf/tez/blob/26164791/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
index e7c02c8..f3fc442 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
@@ -69,10 +69,8 @@ import org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl;
import org.apache.tez.runtime.task.TezChild;
import org.apache.tez.runtime.task.TezChild.ContainerExecutionResult;
import org.apache.tez.shufflehandler.ShuffleHandler;
-import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
-import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.TaskSpecProto;
import org.apache.tez.util.ProtoConverters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -446,7 +444,7 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
request.getContainerIdString());
taskRunner = new TezTaskRunner(conf, taskUgi, localDirs,
- ProtoConverters.getTaskSpecfromProto(request.getTaskSpec()), umbilical,
+ ProtoConverters.getTaskSpecfromProto(request.getTaskSpec()),
request.getAppAttemptNumber(),
serviceConsumerMetadata, envMap, startedInputsMap, taskReporter, executor, objectRegistry,
pid,
[29/43] tez git commit: TEZ-2187. Allow TaskCommunicators to report
failed / killed attempts. (sseth)
Posted by ss...@apache.org.
TEZ-2187. Allow TaskCommunicators to report failed / killed attempts. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/e1ab1914
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/e1ab1914
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/e1ab1914
Branch: refs/heads/TEZ-2003
Commit: e1ab191494658b5470d12d4b25b016530b28d398
Parents: 99a1b85
Author: Siddharth Seth <ss...@apache.org>
Authored: Tue Mar 10 01:25:39 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:30 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../org/apache/tez/common/TezUtilsInternal.java | 60 +++++++++
.../tez/dag/api/TaskAttemptEndReason.java | 24 ++++
.../records/TaskAttemptTerminationCause.java | 7 +-
.../apache/tez/dag/api/TaskCommunicator.java | 2 +
.../tez/dag/api/TaskCommunicatorContext.java | 13 +-
.../dag/app/TaskAttemptListenerImpTezDag.java | 33 +++++
.../event/TaskAttemptEventAttemptFailed.java | 2 +
.../event/TaskAttemptEventAttemptKilled.java | 47 +++++++
.../dag/app/dag/event/TaskAttemptEventType.java | 5 +-
.../tez/dag/app/dag/impl/TaskAttemptImpl.java | 33 ++++-
.../tez/dag/app/rm/AMSchedulerEventTAEnded.java | 9 +-
.../dag/app/rm/LocalTaskSchedulerService.java | 3 +-
.../dag/app/rm/TaskSchedulerEventHandler.java | 7 +-
.../tez/dag/app/rm/TaskSchedulerService.java | 6 +-
.../dag/app/rm/YarnTaskSchedulerService.java | 8 +-
.../app/TestTaskAttemptListenerImplTezDag.java | 1 +
.../app/TestTaskAttemptListenerImplTezDag2.java | 126 +++++++++++++++++++
.../tez/dag/app/rm/TestContainerReuse.java | 65 +++++-----
.../app/rm/TestLocalTaskSchedulerService.java | 5 +-
.../tez/dag/app/rm/TestTaskScheduler.java | 18 +--
.../rm/TezTestServiceTaskSchedulerService.java | 3 +-
.../TezTestServiceTaskCommunicatorImpl.java | 36 +++++-
.../org/apache/tez/service/ContainerRunner.java | 5 +-
.../tez/service/MiniTezTestServiceCluster.java | 5 +-
.../tez/service/impl/ContainerRunnerImpl.java | 60 +++++++--
.../apache/tez/service/impl/TezTestService.java | 6 +-
.../impl/TezTestServiceProtocolServerImpl.java | 10 +-
.../tez/tests/TestExternalTezServices.java | 29 +++++
29 files changed, 548 insertions(+), 81 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 7726815..774a685 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -10,5 +10,6 @@ ALL CHANGES:
TEZ-2138. Fix minor bugs in adding default scheduler, getting launchers.
TEZ-2139. Update tez version to 0.7.0-TEZ-2003-SNAPSHOT.
TEZ-2175. Task priority should be available to the TaskCommunicator plugin.
+ TEZ-2187. Allow TaskCommunicators to report failed / killed attempts.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java
----------------------------------------------------------------------
diff --git a/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java b/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java
index 9c78377..347a4f6 100644
--- a/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java
+++ b/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.Credentials;
import org.apache.log4j.Appender;
import org.apache.tez.dag.api.DagTypeConverters;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
import org.apache.tez.dag.api.TezConstants;
import org.apache.tez.dag.api.records.DAGProtos;
import org.apache.tez.dag.api.records.DAGProtos.ConfigurationProto;
@@ -49,6 +50,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Stopwatch;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
@Private
public class TezUtilsInternal {
@@ -234,4 +236,62 @@ public class TezUtilsInternal {
return sb.toString();
}
+ public static TaskAttemptTerminationCause fromTaskAttemptEndReason(
+ TaskAttemptEndReason taskAttemptEndReason) {
+ if (taskAttemptEndReason == null) {
+ return null;
+ }
+ switch (taskAttemptEndReason) {
+ case COMMUNICATION_ERROR:
+ return TaskAttemptTerminationCause.COMMUNICATION_ERROR;
+ case SERVICE_BUSY:
+ return TaskAttemptTerminationCause.SERVICE_BUSY;
+ case INTERRUPTED_BY_SYSTEM:
+ return TaskAttemptTerminationCause.INTERRUPTED_BY_SYSTEM;
+ case INTERRUPTED_BY_USER:
+ return TaskAttemptTerminationCause.INTERRUPTED_BY_USER;
+ case OTHER:
+ return TaskAttemptTerminationCause.UNKNOWN_ERROR;
+ default:
+ return TaskAttemptTerminationCause.UNKNOWN_ERROR;
+ }
+ }
+
+ public static TaskAttemptEndReason toTaskAttemptEndReason(TaskAttemptTerminationCause cause) {
+ // TODO Post TEZ-2003. Consolidate these states, and mappings.
+ if (cause == null) {
+ return null;
+ }
+ switch (cause) {
+ case COMMUNICATION_ERROR:
+ return TaskAttemptEndReason.COMMUNICATION_ERROR;
+ case SERVICE_BUSY:
+ return TaskAttemptEndReason.SERVICE_BUSY;
+ case INTERRUPTED_BY_SYSTEM:
+ return TaskAttemptEndReason.INTERRUPTED_BY_SYSTEM;
+ case INTERRUPTED_BY_USER:
+ return TaskAttemptEndReason.INTERRUPTED_BY_USER;
+ case UNKNOWN_ERROR:
+ case TERMINATED_BY_CLIENT:
+ case TERMINATED_AT_SHUTDOWN:
+ case INTERNAL_PREEMPTION:
+ case EXTERNAL_PREEMPTION:
+ case TERMINATED_INEFFECTIVE_SPECULATION:
+ case TERMINATED_EFFECTIVE_SPECULATION:
+ case TERMINATED_ORPHANED:
+ case APPLICATION_ERROR:
+ case FRAMEWORK_ERROR:
+ case INPUT_READ_ERROR:
+ case OUTPUT_WRITE_ERROR:
+ case OUTPUT_LOST:
+ case TASK_HEARTBEAT_ERROR:
+ case CONTAINER_LAUNCH_FAILED:
+ case CONTAINER_EXITED:
+ case CONTAINER_STOPPED:
+ case NODE_FAILED:
+ case NODE_DISK_ERROR:
+ default:
+ return TaskAttemptEndReason.OTHER;
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-common/src/main/java/org/apache/tez/dag/api/TaskAttemptEndReason.java
----------------------------------------------------------------------
diff --git a/tez-common/src/main/java/org/apache/tez/dag/api/TaskAttemptEndReason.java b/tez-common/src/main/java/org/apache/tez/dag/api/TaskAttemptEndReason.java
new file mode 100644
index 0000000..96a4768
--- /dev/null
+++ b/tez-common/src/main/java/org/apache/tez/dag/api/TaskAttemptEndReason.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.api;
+
+// TODO TEZ-2003 Expose as a public API
+public enum TaskAttemptEndReason {
+ COMMUNICATION_ERROR,
+ SERVICE_BUSY,
+ INTERRUPTED_BY_SYSTEM,
+ INTERRUPTED_BY_USER,
+ OTHER
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java
----------------------------------------------------------------------
diff --git a/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java b/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java
index ef0bb33..7112d9e 100644
--- a/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java
+++ b/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java
@@ -41,5 +41,10 @@ public enum TaskAttemptTerminationCause {
CONTAINER_STOPPED, // Container stopped or released by Tez
NODE_FAILED, // Node for the container failed
NODE_DISK_ERROR, // Disk failed on the node runnign the task
-
+
+ COMMUNICATION_ERROR, // Equivalent to a launch failure
+ SERVICE_BUSY, // Service rejected the task
+ INTERRUPTED_BY_SYSTEM, // Interrupted by the system. e.g. Pre-emption
+ INTERRUPTED_BY_USER, // Interrupted by the user
+
}
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
index 82eed20..945091e 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
@@ -45,6 +45,8 @@ public abstract class TaskCommunicator extends AbstractService {
Credentials credentials,
boolean credentialsChanged, int priority);
+ // TODO TEZ-2003. Are additional APIs required to mark a container as completed ? - for completeness.
+
// TODO TEZ-2003 Remove reference to TaskAttemptID
public abstract void unregisterRunningTaskAttempt(TezTaskAttemptID taskAttemptID);
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
index 41675fe..a85fb7f 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicatorContext.java
@@ -14,6 +14,7 @@
package org.apache.tez.dag.api;
+import javax.annotation.Nullable;
import java.io.IOException;
import org.apache.hadoop.security.Credentials;
@@ -37,15 +38,21 @@ public interface TaskCommunicatorContext {
// TODO TEZ-2003 Move to vertex, taskIndex, version
boolean canCommit(TezTaskAttemptID taskAttemptId) throws IOException;
+ // TODO TEZ-2003 Split the heartbeat API to a liveness check and a status update
TaskHeartbeatResponse heartbeat(TaskHeartbeatRequest request) throws IOException, TezException;
boolean isKnownContainer(ContainerId containerId);
- // TODO TEZ-2003 Move to vertex, taskIndex, version
+ // TODO TEZ-2003 Move to vertex, taskIndex, version. Rename to taskAttempt*
void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId);
- // TODO TEZ-2003 Add an API to register task failure - for example, a communication failure.
- // This will have to take into consideration the TA_FAILED event
+ // TODO TEZ-2003 Move to vertex, taskIndex, version. Rename to taskAttempt*
+ void taskKilled(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason, @Nullable String diagnostics);
+
+ // TODO TEZ-2003 Move to vertex, taskIndex, version. Rename to taskAttempt*
+ void taskFailed(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason, @Nullable String diagnostics);
+
+ // TODO TEZ-2003 API. Should a method exist for task succeeded.
// TODO Eventually Add methods to report availability stats to the scheduler.
}
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index b570301..94f6cae 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -37,14 +37,17 @@ import org.apache.tez.runtime.api.events.TaskStatusUpdateEvent;
import org.apache.tez.runtime.api.impl.EventType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.tez.common.ReflectionUtils;
+import org.apache.tez.common.TezUtilsInternal;
import org.apache.tez.dag.api.TaskCommunicator;
import org.apache.tez.dag.api.TaskCommunicatorContext;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
import org.apache.tez.dag.api.TaskHeartbeatResponse;
import org.apache.tez.dag.api.TezConstants;
import org.apache.tez.dag.api.TezException;
@@ -54,7 +57,10 @@ import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.tez.dag.api.TaskHeartbeatRequest;
import org.apache.tez.dag.app.dag.DAG;
import org.apache.tez.dag.app.dag.Task;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptKilled;
import org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
import org.apache.tez.dag.app.dag.event.VertexEventRouteEvent;
import org.apache.tez.dag.app.rm.container.AMContainerTask;
import org.apache.tez.dag.records.TezTaskAttemptID;
@@ -257,6 +263,33 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
pingContainerHeartbeatHandler(containerId);
}
+ @Override
+ public void taskKilled(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason,
+ String diagnostics) {
+ // Regular flow via TaskAttempt will take care of un-registering from the heartbeat handler,
+ // and messages from the scheduler will release the container.
+ // TODO TEZ-2003 Maybe consider un-registering here itself, since the task is not active anymore,
+ // instead of waiting for the unregister to flow through the Container.
+ // Fix along the same lines as TEZ-2124 by introducing an explict context.
+ context.getEventHandler().handle(new TaskAttemptEventAttemptKilled(taskAttemptId,
+ diagnostics, TezUtilsInternal.fromTaskAttemptEndReason(
+ taskAttemptEndReason)));
+ }
+
+ @Override
+ public void taskFailed(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason taskAttemptEndReason,
+ String diagnostics) {
+ // Regular flow via TaskAttempt will take care of un-registering from the heartbeat handler,
+ // and messages from the scheduler will release the container.
+ // TODO TEZ-2003 Maybe consider un-registering here itself, since the task is not active anymore,
+ // instead of waiting for the unregister to flow through the Container.
+ // Fix along the same lines as TEZ-2124 by introducing an explict context.
+ context.getEventHandler().handle(new TaskAttemptEventAttemptFailed(taskAttemptId,
+ TaskAttemptEventType.TA_FAILED, diagnostics, TezUtilsInternal.fromTaskAttemptEndReason(
+ taskAttemptEndReason)));
+ }
+
+
/**
* Child checking whether it can commit.
* <p/>
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java
index b9c1d09..7ec8921 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java
@@ -26,6 +26,8 @@ public class TaskAttemptEventAttemptFailed extends TaskAttemptEvent
private final String diagnostics;
private final TaskAttemptTerminationCause errorCause;
+
+ /* Accepted Types - FAILED, TIMED_OUT */
public TaskAttemptEventAttemptFailed(TezTaskAttemptID id,
TaskAttemptEventType type, String diagnostics, TaskAttemptTerminationCause errorCause) {
super(id, type);
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptKilled.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptKilled.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptKilled.java
new file mode 100644
index 0000000..72e6b07
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptKilled.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app.dag.event;
+
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+
+public class TaskAttemptEventAttemptKilled extends TaskAttemptEvent
+ implements DiagnosableEvent, TaskAttemptEventTerminationCauseEvent {
+
+ private final String diagnostics;
+ private final TaskAttemptTerminationCause errorCause;
+ public TaskAttemptEventAttemptKilled(TezTaskAttemptID id,
+ String diagnostics,
+ TaskAttemptTerminationCause errorCause) {
+ super(id, TaskAttemptEventType.TA_KILLED);
+ this.diagnostics = diagnostics;
+ this.errorCause = errorCause;
+ }
+
+ @Override
+ public String getDiagnosticInfo() {
+ return diagnostics;
+ }
+
+ @Override
+ public TaskAttemptTerminationCause getTerminationCause() {
+ return errorCause;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventType.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventType.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventType.java
index b7aca36..6d20368 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventType.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventType.java
@@ -26,14 +26,15 @@ public enum TaskAttemptEventType {
//Producer:Task, Speculator
TA_SCHEDULE,
-//Producer: TaskAttemptListener
+//Producer: TaskAttemptListener | Vertex after routing events
TA_STARTED_REMOTELY,
TA_STATUS_UPDATE,
TA_DIAGNOSTICS_UPDATE, // REMOVE THIS - UNUSED
TA_DONE,
TA_FAILED,
+ TA_KILLED, // Generated by TaskCommunicators
TA_TIMED_OUT,
-
+
//Producer: Client, Scheduler, On speculation.
TA_KILL_REQUEST,
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
index c80571d..11d4df9 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
@@ -51,6 +51,7 @@ import org.apache.hadoop.yarn.state.StateMachineFactory;
import org.apache.hadoop.yarn.util.Clock;
import org.apache.hadoop.yarn.util.RackResolver;
import org.apache.hadoop.yarn.util.Records;
+import org.apache.tez.common.TezUtilsInternal;
import org.apache.tez.common.counters.DAGCounter;
import org.apache.tez.common.counters.TezCounters;
import org.apache.tez.dag.api.ProcessorDescriptor;
@@ -185,6 +186,11 @@ public class TaskAttemptImpl implements TaskAttempt,
private final StateMachine<TaskAttemptStateInternal, TaskAttemptEventType, TaskAttemptEvent> stateMachine;
+ // TODO TEZ-2003 We may need some additional state management for STATUS_UPDATES, FAILED, KILLED coming in before
+ // TASK_STARTED_REMOTELY. In case of a PUSH it's more intuitive to send TASK_STARTED_REMOTELY after communicating
+ // with the listening service and getting a response, which in turn can trigger STATUS_UPDATES / FAILED / KILLED
+
+ // TA_KILLED handled the same as TA_KILL_REQUEST. Just a different name indicating a request / already killed.
private static StateMachineFactory
<TaskAttemptImpl, TaskAttemptStateInternal, TaskAttemptEventType, TaskAttemptEvent>
stateMachineFactory
@@ -225,6 +231,10 @@ public class TaskAttemptImpl implements TaskAttempt,
new TerminatedBeforeRunningTransition(KILLED_HELPER))
.addTransition(TaskAttemptStateInternal.START_WAIT,
TaskAttemptStateInternal.KILL_IN_PROGRESS,
+ TaskAttemptEventType.TA_KILLED,
+ new TerminatedBeforeRunningTransition(KILLED_HELPER))
+ .addTransition(TaskAttemptStateInternal.START_WAIT,
+ TaskAttemptStateInternal.KILL_IN_PROGRESS,
TaskAttemptEventType.TA_NODE_FAILED,
new NodeFailedBeforeRunningTransition())
.addTransition(TaskAttemptStateInternal.START_WAIT,
@@ -265,6 +275,10 @@ public class TaskAttemptImpl implements TaskAttempt,
new TerminatedWhileRunningTransition(KILLED_HELPER))
.addTransition(TaskAttemptStateInternal.RUNNING,
TaskAttemptStateInternal.KILL_IN_PROGRESS,
+ TaskAttemptEventType.TA_KILLED,
+ new TerminatedWhileRunningTransition(KILLED_HELPER))
+ .addTransition(TaskAttemptStateInternal.RUNNING,
+ TaskAttemptStateInternal.KILL_IN_PROGRESS,
TaskAttemptEventType.TA_NODE_FAILED,
new TerminatedWhileRunningTransition(KILLED_HELPER))
.addTransition(TaskAttemptStateInternal.RUNNING,
@@ -303,6 +317,7 @@ public class TaskAttemptImpl implements TaskAttempt,
TaskAttemptEventType.TA_DONE, TaskAttemptEventType.TA_FAILED,
TaskAttemptEventType.TA_TIMED_OUT,
TaskAttemptEventType.TA_KILL_REQUEST,
+ TaskAttemptEventType.TA_KILLED,
TaskAttemptEventType.TA_NODE_FAILED,
TaskAttemptEventType.TA_CONTAINER_TERMINATING,
TaskAttemptEventType.TA_OUTPUT_FAILED))
@@ -324,6 +339,7 @@ public class TaskAttemptImpl implements TaskAttempt,
TaskAttemptEventType.TA_DONE, TaskAttemptEventType.TA_FAILED,
TaskAttemptEventType.TA_TIMED_OUT,
TaskAttemptEventType.TA_KILL_REQUEST,
+ TaskAttemptEventType.TA_KILLED,
TaskAttemptEventType.TA_NODE_FAILED,
TaskAttemptEventType.TA_CONTAINER_TERMINATING,
TaskAttemptEventType.TA_OUTPUT_FAILED))
@@ -342,6 +358,7 @@ public class TaskAttemptImpl implements TaskAttempt,
TaskAttemptEventType.TA_DONE, TaskAttemptEventType.TA_FAILED,
TaskAttemptEventType.TA_TIMED_OUT,
TaskAttemptEventType.TA_KILL_REQUEST,
+ TaskAttemptEventType.TA_KILLED,
TaskAttemptEventType.TA_NODE_FAILED,
TaskAttemptEventType.TA_CONTAINER_TERMINATING,
TaskAttemptEventType.TA_CONTAINER_TERMINATED,
@@ -361,6 +378,7 @@ public class TaskAttemptImpl implements TaskAttempt,
TaskAttemptEventType.TA_DONE, TaskAttemptEventType.TA_FAILED,
TaskAttemptEventType.TA_TIMED_OUT,
TaskAttemptEventType.TA_KILL_REQUEST,
+ TaskAttemptEventType.TA_KILLED,
TaskAttemptEventType.TA_NODE_FAILED,
TaskAttemptEventType.TA_CONTAINER_TERMINATING,
TaskAttemptEventType.TA_CONTAINER_TERMINATED,
@@ -383,6 +401,12 @@ public class TaskAttemptImpl implements TaskAttempt,
TaskAttemptStateInternal.SUCCEEDED,
EnumSet.of(TaskAttemptStateInternal.KILLED,
TaskAttemptStateInternal.SUCCEEDED),
+ TaskAttemptEventType.TA_KILLED,
+ new TerminatedAfterSuccessTransition())
+ .addTransition(
+ TaskAttemptStateInternal.SUCCEEDED,
+ EnumSet.of(TaskAttemptStateInternal.KILLED,
+ TaskAttemptStateInternal.SUCCEEDED),
TaskAttemptEventType.TA_NODE_FAILED,
new TerminatedAfterSuccessTransition())
.addTransition(
@@ -434,7 +458,6 @@ public class TaskAttemptImpl implements TaskAttempt,
this.leafVertex = leafVertex;
}
-
@Override
public TezTaskAttemptID getID() {
return attemptId;
@@ -1030,6 +1053,7 @@ public class TaskAttemptImpl implements TaskAttempt,
// Compute node/rack location request even if re-scheduled.
Set<String> racks = new HashSet<String>();
+ // TODO Post TEZ-2003. Allow for a policy in the VMPlugin to define localicty for different attempts.
TaskLocationHint locationHint = ta.getTaskLocationHint();
if (locationHint != null) {
if (locationHint.getRacks() != null) {
@@ -1104,6 +1128,8 @@ public class TaskAttemptImpl implements TaskAttempt,
@Override
public void transition(TaskAttemptImpl ta, TaskAttemptEvent event) {
+ // This transition should not be invoked directly, if a scheduler event has already been sent out.
+ // Sub-classes should be used if a scheduler request has been sent.
ta.setFinishTime();
if (event instanceof DiagnosableEvent) {
@@ -1218,7 +1244,8 @@ public class TaskAttemptImpl implements TaskAttempt,
// Inform the scheduler
if (sendSchedulerEvent()) {
ta.sendEvent(new AMSchedulerEventTAEnded(ta, ta.containerId, helper
- .getTaskAttemptState(), ta.getVertex().getTaskSchedulerIdentifier()));
+ .getTaskAttemptState(), TezUtilsInternal.toTaskAttemptEndReason(ta.terminationCause),
+ ta.getVertex().getTaskSchedulerIdentifier()));
}
}
}
@@ -1300,7 +1327,7 @@ public class TaskAttemptImpl implements TaskAttempt,
// Inform the Scheduler.
ta.sendEvent(new AMSchedulerEventTAEnded(ta, ta.containerId,
- TaskAttemptState.SUCCEEDED, ta.getVertex().getTaskSchedulerIdentifier()));
+ TaskAttemptState.SUCCEEDED, null, ta.getVertex().getTaskSchedulerIdentifier()));
// Inform the task.
ta.sendEvent(new TaskEventTAUpdate(ta.attemptId,
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java
index 2ace642..a775948 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java
@@ -18,6 +18,7 @@
package org.apache.tez.dag.app.rm;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
import org.apache.tez.dag.app.dag.TaskAttempt;
import org.apache.tez.dag.records.TezTaskAttemptID;
@@ -27,14 +28,16 @@ public class AMSchedulerEventTAEnded extends AMSchedulerEvent {
private final TaskAttempt attempt;
private final ContainerId containerId;
private final TaskAttemptState state;
+ private final TaskAttemptEndReason taskAttemptEndReason;
private final int schedulerId;
public AMSchedulerEventTAEnded(TaskAttempt attempt, ContainerId containerId,
- TaskAttemptState state, int schedulerId) {
+ TaskAttemptState state, TaskAttemptEndReason taskAttemptEndReason, int schedulerId) {
super(AMSchedulerEventType.S_TA_ENDED);
this.attempt = attempt;
this.containerId = containerId;
this.state = state;
+ this.taskAttemptEndReason = taskAttemptEndReason;
this.schedulerId = schedulerId;
}
@@ -57,4 +60,8 @@ public class AMSchedulerEventTAEnded extends AMSchedulerEvent {
public int getSchedulerId() {
return schedulerId;
}
+
+ public TaskAttemptEndReason getTaskAttemptEndReason() {
+ return taskAttemptEndReason;
+ }
}
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
index 72a074f..a234e07 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
@@ -40,6 +40,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.app.AppContext;
@@ -149,7 +150,7 @@ public class LocalTaskSchedulerService extends TaskSchedulerService {
}
@Override
- public boolean deallocateTask(Object task, boolean taskSucceeded) {
+ public boolean deallocateTask(Object task, boolean taskSucceeded, TaskAttemptEndReason endReason) {
return taskRequestHandler.addDeallocateTaskRequest(task);
}
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
index 8e5fc71..9f09f68 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
@@ -289,7 +289,9 @@ public class TaskSchedulerEventHandler extends AbstractService
private void handleTAUnsuccessfulEnd(AMSchedulerEventTAEnded event) {
TaskAttempt attempt = event.getAttempt();
- boolean wasContainerAllocated = taskSchedulers[event.getSchedulerId()].deallocateTask(attempt, false);
+ // Propagate state and failure cause (if any) when informing the scheduler about the de-allocation.
+ boolean wasContainerAllocated = taskSchedulers[event.getSchedulerId()]
+ .deallocateTask(attempt, false, event.getTaskAttemptEndReason());
// use stored value of container id in case the scheduler has removed this
// assignment because the task has been deallocated earlier.
// retroactive case
@@ -311,6 +313,7 @@ public class TaskSchedulerEventHandler extends AbstractService
sendEvent(new AMContainerEventStopRequest(attemptContainerId));
// Inform the Node - the task has asked to be STOPPED / has already
// stopped.
+ // AMNodeImpl blacklisting logic does not account for KILLED attempts.
sendEvent(new AMNodeEventTaskAttemptEnded(appContext.getAllContainers().
get(attemptContainerId).getContainer().getNodeId(), attemptContainerId,
attempt.getID(), event.getState() == TaskAttemptState.FAILED));
@@ -332,7 +335,7 @@ public class TaskSchedulerEventHandler extends AbstractService
}
boolean wasContainerAllocated = taskSchedulers[event.getSchedulerId()].deallocateTask(attempt,
- true);
+ true, null);
if (!wasContainerAllocated) {
LOG.error("De-allocated successful task: " + attempt.getID()
+ ", but TaskScheduler reported no container assigned to task");
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerService.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerService.java
index 48d5455..07dfcd6 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerService.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerService.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
public abstract class TaskSchedulerService extends AbstractService{
@@ -61,8 +62,9 @@ public abstract class TaskSchedulerService extends AbstractService{
public abstract void allocateTask(Object task, Resource capability,
ContainerId containerId, Priority priority, Object containerSignature,
Object clientCookie);
-
- public abstract boolean deallocateTask(Object task, boolean taskSucceeded);
+
+ /** Plugin writers must ensure to de-allocate a container once it's done, so that it can be collected. */
+ public abstract boolean deallocateTask(Object task, boolean taskSucceeded, TaskAttemptEndReason endReason);
public abstract Object deallocateContainer(ContainerId containerId);
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java
index 44f5484..1fc9ac2 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.util.RackResolver;
import org.apache.hadoop.yarn.util.resource.Resources;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.app.AppContext;
@@ -987,10 +988,13 @@ public class YarnTaskSchedulerService extends TaskSchedulerService
* the task to de-allocate.
* @param taskSucceeded
* specify whether the task succeeded or failed.
+ * @param endReason
+ * reason for the task ending
* @return true if a container is assigned to this task.
*/
@Override
- public boolean deallocateTask(Object task, boolean taskSucceeded) {
+ public boolean deallocateTask(Object task, boolean taskSucceeded,
+ TaskAttemptEndReason endReason) {
Map<CookieContainerRequest, Container> assignedContainers = null;
synchronized (this) {
@@ -1180,7 +1184,7 @@ public class YarnTaskSchedulerService extends TaskSchedulerService
CookieContainerRequest request = entry.getValue();
if (request.getPriority().equals(lowestPriNewContainer.getPriority())) {
LOG.info("Resending request for task again: " + task);
- deallocateTask(task, true);
+ deallocateTask(task, true, null);
allocateTask(task, request.getCapability(),
(request.getNodes() == null ? null :
request.getNodes().toArray(new String[request.getNodes().size()])),
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
index 0cf1959..076f9e0 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
@@ -76,6 +76,7 @@ import org.junit.Test;
import org.mockito.ArgumentCaptor;
@SuppressWarnings("unchecked")
+// TODO TEZ-2003 Rename to TestTezTaskCommunicator
public class TestTaskAttemptListenerImplTezDag {
private ApplicationId appId;
private AppContext appContext;
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag2.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag2.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag2.java
new file mode 100644
index 0000000..934543f
--- /dev/null
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag2.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.event.Event;
+import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
+import org.apache.tez.dag.app.dag.DAG;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptKilled;
+import org.apache.tez.dag.app.rm.container.AMContainer;
+import org.apache.tez.dag.app.rm.container.AMContainerMap;
+import org.apache.tez.dag.app.rm.container.AMContainerTask;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.runtime.api.impl.TaskSpec;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+
+// TODO TEZ-2003. Rename to TestTaskAttemptListener | whatever TaskAttemptListener is renamed to.
+public class TestTaskAttemptListenerImplTezDag2 {
+
+ @Test(timeout = 5000)
+ public void testTaskAttemptFailedKilled() {
+ ApplicationId appId = ApplicationId.newInstance(1000, 1);
+ ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
+ Credentials credentials = new Credentials();
+ AppContext appContext = mock(AppContext.class);
+ EventHandler eventHandler = mock(EventHandler.class);
+ DAG dag = mock(DAG.class);
+ AMContainerMap amContainerMap = mock(AMContainerMap.class);
+ Map<ApplicationAccessType, String> appAcls = new HashMap<ApplicationAccessType, String>();
+ doReturn(eventHandler).when(appContext).getEventHandler();
+ doReturn(dag).when(appContext).getCurrentDAG();
+ doReturn(appAttemptId).when(appContext).getApplicationAttemptId();
+ doReturn(credentials).when(appContext).getAppCredentials();
+ doReturn(appAcls).when(appContext).getApplicationACLs();
+ doReturn(amContainerMap).when(appContext).getAllContainers();
+ NodeId nodeId = NodeId.newInstance("localhost", 0);
+ AMContainer amContainer = mock(AMContainer.class);
+ Container container = mock(Container.class);
+ doReturn(nodeId).when(container).getNodeId();
+ doReturn(amContainer).when(amContainerMap).get(any(ContainerId.class));
+ doReturn(container).when(amContainer).getContainer();
+
+ TaskAttemptListenerImpTezDag taskAttemptListener =
+ new TaskAttemptListenerImpTezDag(appContext, mock(TaskHeartbeatHandler.class),
+ mock(ContainerHeartbeatHandler.class), null, null, false);
+
+ TaskSpec taskSpec1 = mock(TaskSpec.class);
+ TezTaskAttemptID taskAttemptId1 = mock(TezTaskAttemptID.class);
+ doReturn(taskAttemptId1).when(taskSpec1).getTaskAttemptID();
+ AMContainerTask amContainerTask1 = new AMContainerTask(taskSpec1, null, null, false, 10);
+
+ TaskSpec taskSpec2 = mock(TaskSpec.class);
+ TezTaskAttemptID taskAttemptId2 = mock(TezTaskAttemptID.class);
+ doReturn(taskAttemptId2).when(taskSpec2).getTaskAttemptID();
+ AMContainerTask amContainerTask2 = new AMContainerTask(taskSpec2, null, null, false, 10);
+
+ ContainerId containerId1 = createContainerId(appId, 1);
+ taskAttemptListener.registerRunningContainer(containerId1, 0);
+ taskAttemptListener.registerTaskAttempt(amContainerTask1, containerId1, 0);
+ ContainerId containerId2 = createContainerId(appId, 2);
+ taskAttemptListener.registerRunningContainer(containerId2, 0);
+ taskAttemptListener.registerTaskAttempt(amContainerTask2, containerId2, 0);
+
+
+ taskAttemptListener
+ .taskFailed(taskAttemptId1, TaskAttemptEndReason.COMMUNICATION_ERROR, "Diagnostics1");
+ taskAttemptListener
+ .taskKilled(taskAttemptId2, TaskAttemptEndReason.SERVICE_BUSY, "Diagnostics2");
+
+ ArgumentCaptor<Event> argumentCaptor = ArgumentCaptor.forClass(Event.class);
+ verify(eventHandler, times(2)).handle(argumentCaptor.capture());
+ assertTrue(argumentCaptor.getAllValues().get(0) instanceof TaskAttemptEventAttemptFailed);
+ assertTrue(argumentCaptor.getAllValues().get(1) instanceof TaskAttemptEventAttemptKilled);
+ TaskAttemptEventAttemptFailed failedEvent =
+ (TaskAttemptEventAttemptFailed) argumentCaptor.getAllValues().get(0);
+ TaskAttemptEventAttemptKilled killedEvent =
+ (TaskAttemptEventAttemptKilled) argumentCaptor.getAllValues().get(1);
+
+ assertEquals("Diagnostics1", failedEvent.getDiagnosticInfo());
+ assertEquals(TaskAttemptTerminationCause.COMMUNICATION_ERROR,
+ failedEvent.getTerminationCause());
+
+ assertEquals("Diagnostics2", killedEvent.getDiagnosticInfo());
+ assertEquals(TaskAttemptTerminationCause.SERVICE_BUSY, killedEvent.getTerminationCause());
+ // TODO TEZ-2003. Verify unregistration from the registered list
+ }
+
+ private ContainerId createContainerId(ApplicationId applicationId, int containerIdx) {
+ ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(applicationId, 1);
+ ContainerId containerId = ContainerId.newInstance(appAttemptId, containerIdx);
+ return containerId;
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
index c1169ef..d45346a 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertEquals;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.eq;
+import static org.mockito.Matchers.isNull;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;
@@ -57,6 +58,7 @@ import org.apache.tez.common.MockDNSToSwitchMapping;
import org.apache.tez.dag.api.InputDescriptor;
import org.apache.tez.dag.api.OutputDescriptor;
import org.apache.tez.dag.api.ProcessorDescriptor;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TaskLocationHint;
import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
@@ -92,6 +94,7 @@ import org.apache.tez.runtime.api.impl.OutputSpec;
import org.apache.tez.runtime.api.impl.TaskSpec;
import org.junit.BeforeClass;
import org.junit.Test;
+import org.mockito.internal.matchers.Null;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
@@ -223,9 +226,9 @@ public class TestContainerReuse {
taskSchedulerEventHandler.handleEvent(
new AMSchedulerEventTAEnded(
- ta11, containerHost1.getId(), TaskAttemptState.SUCCEEDED, 0));
+ ta11, containerHost1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
drainableAppCallback.drain();
- verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
+ verify(taskScheduler).deallocateTask(eq(ta11), eq(true), eq((TaskAttemptEndReason)null));
verify(taskSchedulerEventHandler, times(1)).taskAllocated(
eq(ta31), any(Object.class), eq(containerHost1));
verify(rmClient, times(0)).releaseAssignedContainer(
@@ -235,7 +238,7 @@ public class TestContainerReuse {
taskSchedulerEventHandler.handleEvent(
new AMSchedulerEventTAEnded(ta21, containerHost2.getId(),
- TaskAttemptState.SUCCEEDED, 0));
+ TaskAttemptState.SUCCEEDED, null, 0));
long currentTs = System.currentTimeMillis();
Throwable exception = null;
@@ -356,9 +359,9 @@ public class TestContainerReuse {
taskSchedulerEventHandler.handleEvent(
new AMSchedulerEventTAEnded(ta21, containerHost2.getId(),
- TaskAttemptState.SUCCEEDED, 0));
+ TaskAttemptState.SUCCEEDED, null, 0));
drainableAppCallback.drain();
- verify(taskScheduler).deallocateTask(eq(ta21), eq(true));
+ verify(taskScheduler).deallocateTask(eq(ta21), eq(true), eq((TaskAttemptEndReason)null));
verify(taskSchedulerEventHandler, times(0)).taskAllocated(
eq(ta31), any(Object.class), eq(containerHost2));
verify(rmClient, times(1)).releaseAssignedContainer(
@@ -459,9 +462,9 @@ public class TestContainerReuse {
verify(taskSchedulerEventHandler).taskAllocated(eq(ta11), any(Object.class), eq(container1));
// Task assigned to container completed successfully. Container should be re-used.
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
drainableAppCallback.drain();
- verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
+ verify(taskScheduler).deallocateTask(eq(ta11), eq(true), eq((TaskAttemptEndReason)null));
verify(taskSchedulerEventHandler).taskAllocated(eq(ta12), any(Object.class), eq(container1));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
@@ -469,19 +472,19 @@ public class TestContainerReuse {
// Task assigned to container completed successfully.
// Verify reuse across hosts.
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta12, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta12, container1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
drainableAppCallback.drain();
- verify(taskScheduler).deallocateTask(eq(ta12), eq(true));
+ verify(taskScheduler).deallocateTask(eq(ta12), eq(true), eq((TaskAttemptEndReason)null));
verify(taskSchedulerEventHandler).taskAllocated(eq(ta13), any(Object.class), eq(container1));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
eventHandler.reset();
// Verify no re-use if a previous task fails.
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta13, container1.getId(), TaskAttemptState.FAILED, 0));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta13, container1.getId(), TaskAttemptState.FAILED, null, 0));
drainableAppCallback.drain();
verify(taskSchedulerEventHandler, times(0)).taskAllocated(eq(ta14), any(Object.class), eq(container1));
- verify(taskScheduler).deallocateTask(eq(ta13), eq(false));
+ verify(taskScheduler).deallocateTask(eq(ta13), eq(false), eq((TaskAttemptEndReason)null));
verify(rmClient).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyInvocation(AMContainerEventStopRequest.class);
eventHandler.reset();
@@ -496,9 +499,9 @@ public class TestContainerReuse {
verify(taskSchedulerEventHandler).taskAllocated(eq(ta14), any(Object.class), eq(container2));
// Task assigned to container completed successfully. No pending requests. Container should be released.
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta14, container2.getId(), TaskAttemptState.SUCCEEDED, 0));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta14, container2.getId(), TaskAttemptState.SUCCEEDED, null, 0));
drainableAppCallback.drain();
- verify(taskScheduler).deallocateTask(eq(ta14), eq(true));
+ verify(taskScheduler).deallocateTask(eq(ta14), eq(true), eq((TaskAttemptEndReason)null));
verify(rmClient).releaseAssignedContainer(eq(container2.getId()));
eventHandler.verifyInvocation(AMContainerEventStopRequest.class);
eventHandler.reset();
@@ -607,9 +610,9 @@ public class TestContainerReuse {
// First task had profiling on. This container can not be reused further.
taskSchedulerEventHandler.handleEvent(
- new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
+ new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
drainableAppCallback.drain();
- verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
+ verify(taskScheduler).deallocateTask(eq(ta11), eq(true), eq((TaskAttemptEndReason)null));
verify(taskSchedulerEventHandler, times(0)).taskAllocated(eq(ta12), any(Object.class),
eq(container1));
verify(rmClient, times(1)).releaseAssignedContainer(eq(container1.getId()));
@@ -653,9 +656,9 @@ public class TestContainerReuse {
// Verify that the container can not be reused when profiling option is turned on
// Even for 2 tasks having same profiling option can have container reusability.
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta13, container2.getId(), TaskAttemptState.SUCCEEDED, 0));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta13, container2.getId(), TaskAttemptState.SUCCEEDED, null, 0));
drainableAppCallback.drain();
- verify(taskScheduler).deallocateTask(eq(ta13), eq(true));
+ verify(taskScheduler).deallocateTask(eq(ta13), eq(true), eq((TaskAttemptEndReason)null));
verify(taskSchedulerEventHandler, times(0)).taskAllocated(eq(ta14), any(Object.class),
eq(container2));
verify(rmClient, times(1)).releaseAssignedContainer(eq(container2.getId()));
@@ -698,9 +701,9 @@ public class TestContainerReuse {
verify(taskSchedulerEventHandler).taskAllocated(eq(ta15), any(Object.class), eq(container3));
//Ensure task 6 (of vertex 1) is allocated to same container
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta15, container3.getId(), TaskAttemptState.SUCCEEDED, 0));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta15, container3.getId(), TaskAttemptState.SUCCEEDED, null, 0));
drainableAppCallback.drain();
- verify(taskScheduler).deallocateTask(eq(ta15), eq(true));
+ verify(taskScheduler).deallocateTask(eq(ta15), eq(true), eq((TaskAttemptEndReason)null));
verify(taskSchedulerEventHandler).taskAllocated(eq(ta16), any(Object.class), eq(container3));
eventHandler.reset();
@@ -811,9 +814,9 @@ public class TestContainerReuse {
// until delay expires.
taskSchedulerEventHandler.handleEvent(
new AMSchedulerEventTAEnded(ta11, container1.getId(),
- TaskAttemptState.SUCCEEDED, 0));
+ TaskAttemptState.SUCCEEDED, null, 0));
drainableAppCallback.drain();
- verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
+ verify(taskScheduler).deallocateTask(eq(ta11), eq(true), eq((TaskAttemptEndReason)null));
verify(taskSchedulerEventHandler, times(0)).taskAllocated(
eq(ta12), any(Object.class), eq(container1));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
@@ -828,7 +831,7 @@ public class TestContainerReuse {
// TA12 completed.
taskSchedulerEventHandler.handleEvent(
new AMSchedulerEventTAEnded(ta12, container1.getId(),
- TaskAttemptState.SUCCEEDED, 0));
+ TaskAttemptState.SUCCEEDED, null, 0));
drainableAppCallback.drain();
LOG.info("Sleeping to ensure that the scheduling loop runs");
Thread.sleep(3000l);
@@ -946,9 +949,9 @@ public class TestContainerReuse {
// Container should be assigned to task21.
taskSchedulerEventHandler.handleEvent(
new AMSchedulerEventTAEnded(ta11, container1.getId(),
- TaskAttemptState.SUCCEEDED, 0));
+ TaskAttemptState.SUCCEEDED, null, 0));
drainableAppCallback.drain();
- verify(taskScheduler).deallocateTask(eq(ta11), eq(true));
+ verify(taskScheduler).deallocateTask(eq(ta11), eq(true), eq((TaskAttemptEndReason)null));
verify(taskSchedulerEventHandler).taskAllocated(
eq(ta21), any(Object.class), eq(container1));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
@@ -956,7 +959,7 @@ public class TestContainerReuse {
// Task 2 completes.
taskSchedulerEventHandler.handleEvent(
new AMSchedulerEventTAEnded(ta21, container1.getId(),
- TaskAttemptState.SUCCEEDED, 0));
+ TaskAttemptState.SUCCEEDED, null, 0));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
LOG.info("Sleeping to ensure that the scheduling loop runs");
@@ -1065,9 +1068,9 @@ public class TestContainerReuse {
assertEquals(1, assignEvent.getRemoteTaskLocalResources().size());
// Task assigned to container completed successfully. Container should be re-used.
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta111, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta111, container1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
drainableAppCallback.drain();
- verify(taskScheduler).deallocateTask(eq(ta111), eq(true));
+ verify(taskScheduler).deallocateTask(eq(ta111), eq(true), eq((TaskAttemptEndReason)null));
verify(taskSchedulerEventHandler).taskAllocated(eq(ta112), any(Object.class), eq(container1));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
@@ -1077,9 +1080,9 @@ public class TestContainerReuse {
// Task assigned to container completed successfully.
// Verify reuse across hosts.
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta112, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta112, container1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
drainableAppCallback.drain();
- verify(taskScheduler).deallocateTask(eq(ta112), eq(true));
+ verify(taskScheduler).deallocateTask(eq(ta112), eq(true), eq((TaskAttemptEndReason)null));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
eventHandler.reset();
@@ -1118,9 +1121,9 @@ public class TestContainerReuse {
assertEquals(2, assignEvent.getRemoteTaskLocalResources().size());
eventHandler.reset();
- taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta211, container1.getId(), TaskAttemptState.SUCCEEDED, 0));
+ taskSchedulerEventHandler.handleEvent(new AMSchedulerEventTAEnded(ta211, container1.getId(), TaskAttemptState.SUCCEEDED, null, 0));
drainableAppCallback.drain();
- verify(taskScheduler).deallocateTask(eq(ta211), eq(true));
+ verify(taskScheduler).deallocateTask(eq(ta211), eq(true), eq((TaskAttemptEndReason)null));
verify(taskSchedulerEventHandler).taskAllocated(eq(ta212), any(Object.class), eq(container1));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
index 25cf4b5..0a642bb 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
import org.apache.tez.dag.app.AppContext;
import org.apache.tez.dag.app.dag.Task;
import org.apache.tez.dag.app.rm.TaskSchedulerService.TaskSchedulerAppCallback;
@@ -94,7 +95,7 @@ public class TestLocalTaskSchedulerService {
Task task = mock(Task.class);
taskSchedulerService.allocateTask(task, Resource.newInstance(1024, 1), null, null, Priority.newInstance(1), null, null);
- taskSchedulerService.deallocateTask(task, false);
+ taskSchedulerService.deallocateTask(task, false, null);
// start the RequestHandler, DeallocateTaskRequest has higher priority, so will be processed first
taskSchedulerService.startRequestHandlerThread();
@@ -126,7 +127,7 @@ public class TestLocalTaskSchedulerService {
MockAsyncDelegateRequestHandler requestHandler = taskSchedulerService.getRequestHandler();
requestHandler.drainRequest(1);
- taskSchedulerService.deallocateTask(task, false);
+ taskSchedulerService.deallocateTask(task, false, null);
requestHandler.drainRequest(2);
assertEquals(1, requestHandler.deallocateCount);
assertEquals(1, requestHandler.allocateCount);
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java
index dabae67..807e772 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java
@@ -172,7 +172,7 @@ public class TestTaskScheduler {
addContainerRequest((CookieContainerRequest) any());
// returned from task requests before allocation happens
- assertFalse(scheduler.deallocateTask(mockTask1, true));
+ assertFalse(scheduler.deallocateTask(mockTask1, true, null));
verify(mockApp, times(0)).containerBeingReleased(any(ContainerId.class));
verify(mockRMClient, times(1)).
removeContainerRequest((CookieContainerRequest) any());
@@ -180,7 +180,7 @@ public class TestTaskScheduler {
releaseAssignedContainer((ContainerId) any());
// deallocating unknown task
- assertFalse(scheduler.deallocateTask(mockTask1, true));
+ assertFalse(scheduler.deallocateTask(mockTask1, true, null));
verify(mockApp, times(0)).containerBeingReleased(any(ContainerId.class));
verify(mockRMClient, times(1)).
removeContainerRequest((CookieContainerRequest) any());
@@ -325,7 +325,7 @@ public class TestTaskScheduler {
verify(mockRMClient).releaseAssignedContainer(mockCId4);
// deallocate allocated task
- assertTrue(scheduler.deallocateTask(mockTask1, true));
+ assertTrue(scheduler.deallocateTask(mockTask1, true, null));
drainableAppCallback.drain();
verify(mockApp).containerBeingReleased(mockCId1);
verify(mockRMClient).releaseAssignedContainer(mockCId1);
@@ -445,7 +445,7 @@ public class TestTaskScheduler {
verify(mockApp, times(4)).taskAllocated(any(), any(), (Container) any());
verify(mockApp).taskAllocated(mockTask4, mockCookie4, mockContainer6);
// deallocate allocated task
- assertTrue(scheduler.deallocateTask(mockTask4, true));
+ assertTrue(scheduler.deallocateTask(mockTask4, true, null));
drainableAppCallback.drain();
verify(mockApp).containerBeingReleased(mockCId6);
verify(mockRMClient).releaseAssignedContainer(mockCId6);
@@ -475,7 +475,7 @@ public class TestTaskScheduler {
removeContainerRequest((CookieContainerRequest) any());
verify(mockRMClient, times(8)).addContainerRequest(
(CookieContainerRequest) any());
- assertFalse(scheduler.deallocateTask(mockTask1, true));
+ assertFalse(scheduler.deallocateTask(mockTask1, true, null));
List<NodeReport> mockUpdatedNodes = mock(List.class);
scheduler.onNodesUpdated(mockUpdatedNodes);
@@ -741,7 +741,7 @@ public class TestTaskScheduler {
verify(mockRMClient).releaseAssignedContainer(mockCId4);
// deallocate allocated task
- assertTrue(scheduler.deallocateTask(mockTask1, true));
+ assertTrue(scheduler.deallocateTask(mockTask1, true, null));
drainableAppCallback.drain();
verify(mockApp).containerBeingReleased(mockCId1);
verify(mockRMClient).releaseAssignedContainer(mockCId1);
@@ -871,7 +871,7 @@ public class TestTaskScheduler {
verify(mockApp, times(4)).taskAllocated(any(), any(), (Container) any());
verify(mockApp).taskAllocated(mockTask4, mockCookie4, mockContainer6);
// deallocate allocated task
- assertTrue(scheduler.deallocateTask(mockTask4, true));
+ assertTrue(scheduler.deallocateTask(mockTask4, true, null));
drainableAppCallback.drain();
verify(mockApp).containerBeingReleased(mockCId6);
verify(mockRMClient).releaseAssignedContainer(mockCId6);
@@ -960,8 +960,8 @@ public class TestTaskScheduler {
// container7 allocated to the task with affinity for it
verify(mockApp).taskAllocated(mockTask6, mockCookie6, mockContainer7);
// deallocate allocated task
- assertTrue(scheduler.deallocateTask(mockTask5, true));
- assertTrue(scheduler.deallocateTask(mockTask6, true));
+ assertTrue(scheduler.deallocateTask(mockTask5, true, null));
+ assertTrue(scheduler.deallocateTask(mockTask6, true, null));
drainableAppCallback.drain();
verify(mockApp).containerBeingReleased(mockCId7);
verify(mockApp).containerBeingReleased(mockCId8);
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
index 5657f86..872d592 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
import org.apache.tez.dag.app.AppContext;
import org.apache.tez.service.TezTestServiceConfConstants;
@@ -198,7 +199,7 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
}
@Override
- public boolean deallocateTask(Object task, boolean taskSucceeded) {
+ public boolean deallocateTask(Object task, boolean taskSucceeded, TaskAttemptEndReason endReason) {
ContainerId containerId = runningTasks.remove(task);
if (containerId == null) {
LOG.error("Could not determine ContainerId for task: " + task +
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
index a327caf..e3385a2 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
@@ -19,16 +19,20 @@ import java.nio.ByteBuffer;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.RejectedExecutionException;
import com.google.protobuf.ByteString;
+import com.google.protobuf.ServiceException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.tez.dag.api.TaskAttemptEndReason;
import org.apache.tez.dag.api.TaskCommunicatorContext;
import org.apache.tez.dag.app.TezTaskCommunicatorImpl;
import org.apache.tez.dag.app.TezTestServiceCommunicator;
@@ -83,6 +87,7 @@ public class TezTestServiceTaskCommunicatorImpl extends TezTaskCommunicatorImpl
@Override
public void serviceStop() {
super.serviceStop();
+ this.communicator.stop();
}
@@ -123,13 +128,15 @@ public class TezTestServiceTaskCommunicatorImpl extends TezTaskCommunicatorImpl
throw new RuntimeException("ContainerInfo not found for container: " + containerId +
", while trying to launch task: " + taskSpec.getTaskAttemptID());
}
+ // Have to register this up front right now. Otherwise, it's possible for the task to start
+ // sending out status/DONE/KILLED/FAILED messages before TAImpl knows how to handle them.
+ getTaskCommunicatorContext()
+ .taskStartedRemotely(taskSpec.getTaskAttemptID(), containerId);
communicator.submitWork(requestProto, host, port,
new TezTestServiceCommunicator.ExecuteRequestCallback<SubmitWorkResponseProto>() {
@Override
public void setResponse(SubmitWorkResponseProto response) {
LOG.info("Successfully launched task: " + taskSpec.getTaskAttemptID());
- getTaskCommunicatorContext()
- .taskStartedRemotely(taskSpec.getTaskAttemptID(), containerId);
}
@Override
@@ -137,6 +144,31 @@ public class TezTestServiceTaskCommunicatorImpl extends TezTaskCommunicatorImpl
// TODO Handle this error. This is where an API on the context to indicate failure / rejection comes in.
LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " +
containerId, t);
+ if (t instanceof ServiceException) {
+ ServiceException se = (ServiceException) t;
+ t = se.getCause();
+ }
+ if (t instanceof RemoteException) {
+ RemoteException re = (RemoteException)t;
+ String message = re.toString();
+ if (message.contains(RejectedExecutionException.class.getName())) {
+ getTaskCommunicatorContext().taskKilled(taskSpec.getTaskAttemptID(),
+ TaskAttemptEndReason.SERVICE_BUSY, "Service Busy");
+ } else {
+ getTaskCommunicatorContext()
+ .taskFailed(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.OTHER,
+ t.toString());
+ }
+ } else {
+ if (t instanceof IOException) {
+ getTaskCommunicatorContext().taskKilled(taskSpec.getTaskAttemptID(),
+ TaskAttemptEndReason.COMMUNICATION_ERROR, "Communication Error");
+ } else {
+ getTaskCommunicatorContext()
+ .taskFailed(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.OTHER,
+ t.getMessage());
+ }
+ }
}
});
}
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java
index 2bca4ed..28c2286 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/ContainerRunner.java
@@ -16,12 +16,13 @@ package org.apache.tez.service;
import java.io.IOException;
+import org.apache.tez.dag.api.TezException;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
public interface ContainerRunner {
- void queueContainer(RunContainerRequestProto request) throws IOException;
- void submitWork(SubmitWorkRequestProto request) throws IOException;
+ void queueContainer(RunContainerRequestProto request) throws TezException;
+ void submitWork(SubmitWorkRequestProto request) throws TezException;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
index f47bd67..0ac0b33 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java
@@ -133,7 +133,10 @@ public class MiniTezTestServiceCluster extends AbstractService {
@Override
public void serviceStop() {
- tezTestService.stop();
+ if (tezTestService != null) {
+ tezTestService.stop();
+ tezTestService = null;
+ }
}
/**
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
index 25d6030..379d952 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java
@@ -25,6 +25,7 @@ import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
+import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
@@ -58,6 +59,7 @@ import org.apache.tez.common.security.JobTokenIdentifier;
import org.apache.tez.common.security.TokenCache;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezException;
+import org.apache.tez.runtime.api.impl.TaskSpec;
import org.apache.tez.runtime.task.TaskReporter;
import org.apache.tez.runtime.task.TezTaskRunner;
import org.apache.tez.service.ContainerRunner;
@@ -68,14 +70,18 @@ import org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl;
import org.apache.tez.runtime.task.TezChild;
import org.apache.tez.runtime.task.TezChild.ContainerExecutionResult;
import org.apache.tez.shufflehandler.ShuffleHandler;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.TaskSpecProto;
import org.apache.tez.util.ProtoConverters;
public class ContainerRunnerImpl extends AbstractService implements ContainerRunner {
private static final Logger LOG = Logger.getLogger(ContainerRunnerImpl.class);
+ public static final String DAG_NAME_INSTRUMENTED_FAILURES = "InstrumentedFailures";
+
private final ListeningExecutorService executorService;
private final AtomicReference<InetSocketAddress> localAddress;
private final String[] localDirsBase;
@@ -146,10 +152,10 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
* Submit a container which is ready for running.
* The regular pull mechanism will be used to fetch work from the AM
* @param request
- * @throws IOException
+ * @throws TezException
*/
@Override
- public void queueContainer(RunContainerRequestProto request) throws IOException {
+ public void queueContainer(RunContainerRequestProto request) throws TezException {
LOG.info("Queuing container for execution: " + request);
Map<String, String> env = new HashMap<String, String>();
@@ -162,7 +168,11 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
for (int i = 0; i < localDirsBase.length; i++) {
localDirs[i] = createAppSpecificLocalDir(localDirsBase[i], request.getApplicationIdString(),
request.getUser());
- localFs.mkdirs(new Path(localDirs[i]));
+ try {
+ localFs.mkdirs(new Path(localDirs[i]));
+ } catch (IOException e) {
+ throw new TezException(e);
+ }
}
LOG.info("DEBUG: Dirs are: " + Arrays.toString(localDirs));
@@ -175,7 +185,11 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
DataInputBuffer dib = new DataInputBuffer();
byte[] tokenBytes = request.getCredentialsBinary().toByteArray();
dib.reset(tokenBytes, tokenBytes.length);
- credentials.readTokenStorageStream(dib);
+ try {
+ credentials.readTokenStorageStream(dib);
+ } catch (IOException e) {
+ throw new TezException(e);
+ }
Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
@@ -197,13 +211,14 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
* This is intended for a task push from the AM
*
* @param request
- * @throws IOException
+ * @throws org.apache.tez.dag.api.TezException
*/
@Override
- public void submitWork(SubmitWorkRequestProto request) throws
- IOException {
+ public void submitWork(SubmitWorkRequestProto request) throws TezException {
LOG.info("Queuing work for execution: " + request);
+ checkAndThrowExceptionForTests(request);
+
Map<String, String> env = new HashMap<String, String>();
env.putAll(localEnv);
env.put(ApplicationConstants.Environment.USER.name(), request.getUser());
@@ -214,7 +229,11 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
for (int i = 0; i < localDirsBase.length; i++) {
localDirs[i] = createAppSpecificLocalDir(localDirsBase[i], request.getApplicationIdString(),
request.getUser());
- localFs.mkdirs(new Path(localDirs[i]));
+ try {
+ localFs.mkdirs(new Path(localDirs[i]));
+ } catch (IOException e) {
+ throw new TezException(e);
+ }
}
if (LOG.isDebugEnabled()) {
LOG.debug("Dirs are: " + Arrays.toString(localDirs));
@@ -228,7 +247,11 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
DataInputBuffer dib = new DataInputBuffer();
byte[] tokenBytes = request.getCredentialsBinary().toByteArray();
dib.reset(tokenBytes, tokenBytes.length);
- credentials.readTokenStorageStream(dib);
+ try {
+ credentials.readTokenStorageStream(dib);
+ } catch (IOException e) {
+ throw new TezException(e);
+ }
Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
@@ -509,4 +532,23 @@ public class ContainerRunnerImpl extends AbstractService implements ContainerRun
}
}
+
+ private void checkAndThrowExceptionForTests(SubmitWorkRequestProto request) throws TezException {
+ if (!request.getTaskSpec().getDagName().equals(DAG_NAME_INSTRUMENTED_FAILURES)) {
+ return;
+ }
+
+ TaskSpec taskSpec = ProtoConverters.getTaskSpecfromProto(request.getTaskSpec());
+ if (taskSpec.getTaskAttemptID().getTaskID().getId() == 0 &&
+ taskSpec.getTaskAttemptID().getId() == 0) {
+ LOG.info("Simulating Rejected work");
+ throw new RejectedExecutionException(
+ "Simulating Rejected work for taskAttemptId=" + taskSpec.getTaskAttemptID());
+ } else if (taskSpec.getTaskAttemptID().getTaskID().getId() == 1 &&
+ taskSpec.getTaskAttemptID().getId() == 0) {
+ LOG.info("Simulating Task Setup Failure during launch");
+ throw new TezException("Simulating Task Setup Failure during launch for taskAttemptId=" +
+ taskSpec.getTaskAttemptID());
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
index 012e352..855f1b0 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java
@@ -14,7 +14,6 @@
package org.apache.tez.service.impl;
-import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicInteger;
@@ -25,6 +24,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.util.StringUtils;
import org.apache.log4j.Logger;
+import org.apache.tez.dag.api.TezException;
import org.apache.tez.service.ContainerRunner;
import org.apache.tez.shufflehandler.ShuffleHandler;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
@@ -108,14 +108,14 @@ public class TezTestService extends AbstractService implements ContainerRunner {
@Override
- public void queueContainer(RunContainerRequestProto request) throws IOException {
+ public void queueContainer(RunContainerRequestProto request) throws TezException {
numSubmissions.incrementAndGet();
containerRunner.queueContainer(request);
}
@Override
public void submitWork(TezTestServiceProtocolProtos.SubmitWorkRequestProto request) throws
- IOException {
+ TezException {
numSubmissions.incrementAndGet();
containerRunner.submitWork(request);
}
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
index d7f8444..39d7156 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestServiceProtocolServerImpl.java
@@ -30,11 +30,13 @@ import org.apache.hadoop.ipc.ProtobufRpcEngine;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.service.AbstractService;
+import org.apache.tez.dag.api.TezException;
import org.apache.tez.service.ContainerRunner;
import org.apache.tez.service.TezTestServiceProtocolBlockingPB;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerResponseProto;
+import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkRequestProto;
import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.SubmitWorkResponseProto;
public class TezTestServiceProtocolServerImpl extends AbstractService
@@ -61,20 +63,20 @@ public class TezTestServiceProtocolServerImpl extends AbstractService
LOG.info("Received request: " + request);
try {
containerRunner.queueContainer(request);
- } catch (IOException e) {
+ } catch (TezException e) {
throw new ServiceException(e);
}
return RunContainerResponseProto.getDefaultInstance();
}
@Override
- public SubmitWorkResponseProto submitWork(RpcController controller, TezTestServiceProtocolProtos.SubmitWorkRequestProto request) throws
+ public SubmitWorkResponseProto submitWork(RpcController controller, SubmitWorkRequestProto request) throws
ServiceException {
LOG.info("Received submitWork request: " + request);
try {
containerRunner.submitWork(request);
- } catch (IOException e) {
- e.printStackTrace();
+ } catch (TezException e) {
+ throw new ServiceException(e);
}
return SubmitWorkResponseProto.getDefaultInstance();
}
http://git-wip-us.apache.org/repos/asf/tez/blob/e1ab1914/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
index 0ec972b..b6a166d 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
@@ -27,16 +27,23 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.tez.client.TezClient;
+import org.apache.tez.dag.api.DAG;
+import org.apache.tez.dag.api.ProcessorDescriptor;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezConstants;
import org.apache.tez.dag.api.TezException;
+import org.apache.tez.dag.api.Vertex;
+import org.apache.tez.dag.api.client.DAGClient;
+import org.apache.tez.dag.api.client.DAGStatus;
import org.apache.tez.dag.app.launcher.TezTestServiceNoOpContainerLauncher;
import org.apache.tez.dag.app.rm.TezTestServiceTaskSchedulerService;
import org.apache.tez.dag.app.taskcomm.TezTestServiceTaskCommunicatorImpl;
import org.apache.tez.examples.HashJoinExample;
import org.apache.tez.examples.JoinDataGen;
import org.apache.tez.examples.JoinValidateConfigured;
+import org.apache.tez.runtime.library.processor.SleepProcessor;
import org.apache.tez.service.MiniTezTestServiceCluster;
+import org.apache.tez.service.impl.ContainerRunnerImpl;
import org.apache.tez.test.MiniTezCluster;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@@ -283,6 +290,28 @@ public class TestExternalTezServices {
PROPS_IN_AM, PROPS_REGULAR_CONTAINERS);
}
+ @Test(timeout = 60000)
+ public void testErrorPropagation() throws TezException, InterruptedException, IOException {
+ runExceptionSimulation();
+ }
+
+
+
+ private void runExceptionSimulation() throws IOException, TezException, InterruptedException {
+ DAG dag = DAG.create(ContainerRunnerImpl.DAG_NAME_INSTRUMENTED_FAILURES);
+ Vertex v =Vertex.create("Vertex1", ProcessorDescriptor.create(SleepProcessor.class.getName()),
+ 3);
+ for (Map.Entry<String, String> prop : PROPS_EXT_SERVICE_PUSH.entrySet()) {
+ v.setConf(prop.getKey(), prop.getValue());
+ }
+ dag.addVertex(v);
+ DAGClient dagClient = sharedTezClient.submitDAG(dag);
+ DAGStatus dagStatus = dagClient.waitForCompletion();
+ assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
+ assertEquals(1, dagStatus.getDAGProgress().getFailedTaskAttemptCount());
+ assertEquals(1, dagStatus.getDAGProgress().getKilledTaskAttemptCount());
+
+ }
private void runJoinValidate(String name, int extExpectedCount, Map<String, String> lhsProps,
Map<String, String> rhsProps,
[35/43] tez git commit: TEZ-2284. Separate TaskReporter into an
interface. (sseth)
Posted by ss...@apache.org.
TEZ-2284. Separate TaskReporter into an interface. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/2b05376a
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/2b05376a
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/2b05376a
Branch: refs/heads/TEZ-2003
Commit: 2b05376ad2b63b81d4ab09c6052509066341b47a
Parents: 2616479
Author: Siddharth Seth <ss...@apache.org>
Authored: Tue Apr 7 13:21:35 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:31 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../internals/api/TaskReporterInterface.java | 46 ++++++++++++++++++++
.../apache/tez/runtime/task/TaskReporter.java | 12 ++++-
.../org/apache/tez/runtime/task/TezChild.java | 3 +-
.../apache/tez/runtime/task/TezTaskRunner.java | 5 ++-
5 files changed, 62 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/2b05376a/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 6a4399c..e2c428d 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -13,5 +13,6 @@ ALL CHANGES:
TEZ-2187. Allow TaskCommunicators to report failed / killed attempts.
TEZ-2241. Miscellaneous fixes after last reabse.
TEZ-2283. Fixes after rebase 04/07.
+ TEZ-2284. Separate TaskReporter into an interface.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/2b05376a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TaskReporterInterface.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TaskReporterInterface.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TaskReporterInterface.java
new file mode 100644
index 0000000..47a61ab
--- /dev/null
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TaskReporterInterface.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.runtime.internals.api;
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.tez.dag.api.TezException;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.runtime.RuntimeTask;
+import org.apache.tez.runtime.api.impl.EventMetaData;
+import org.apache.tez.runtime.api.impl.TezEvent;
+import org.apache.tez.runtime.task.ErrorReporter;
+
+public interface TaskReporterInterface {
+
+ // TODO TEZ-2003 Consolidate private API usage if making this public
+
+ void registerTask(RuntimeTask task, ErrorReporter errorReporter);
+
+ void unregisterTask(TezTaskAttemptID taskAttemptId);
+
+ boolean taskSucceeded(TezTaskAttemptID taskAttemptId) throws IOException, TezException;
+
+ boolean taskFailed(TezTaskAttemptID taskAttemptId, Throwable cause, String diagnostics, EventMetaData srcMeta) throws IOException,
+ TezException;
+
+ void addEvents(TezTaskAttemptID taskAttemptId, Collection<TezEvent> events);
+
+ boolean canCommit(TezTaskAttemptID taskAttemptId) throws IOException;
+
+ void shutdown();
+
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/2b05376a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
index 8b9db16..b95e514 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
@@ -48,6 +48,7 @@ import org.apache.tez.runtime.api.impl.TezEvent;
import org.apache.tez.runtime.api.impl.TezHeartbeatRequest;
import org.apache.tez.runtime.api.impl.TezHeartbeatResponse;
import org.apache.tez.runtime.api.impl.EventMetaData.EventProducerConsumerType;
+import org.apache.tez.runtime.internals.api.TaskReporterInterface;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -66,7 +67,7 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder;
* retrieve events specific to this task.
*
*/
-public class TaskReporter {
+public class TaskReporter implements TaskReporterInterface {
private static final Logger LOG = LoggerFactory.getLogger(TaskReporter.class);
@@ -98,6 +99,7 @@ public class TaskReporter {
/**
* Register a task to be tracked. Heartbeats will be sent out for this task to fetch events, etc.
*/
+ @Override
public synchronized void registerTask(RuntimeTask task,
ErrorReporter errorReporter) {
currentCallable = new HeartbeatCallable(task, umbilical, pollInterval, sendCounterInterval,
@@ -110,12 +112,14 @@ public class TaskReporter {
* This method should always be invoked before setting up heartbeats for another task running in
* the same container.
*/
+ @Override
public synchronized void unregisterTask(TezTaskAttemptID taskAttemptID) {
currentCallable.markComplete();
currentCallable = null;
// KKK Make sure the callable completes before proceeding
}
-
+
+ @Override
public void shutdown() {
heartbeatExecutor.shutdownNow();
}
@@ -411,19 +415,23 @@ public class TaskReporter {
}
}
+ @Override
public synchronized boolean taskSucceeded(TezTaskAttemptID taskAttemptID) throws IOException, TezException {
return currentCallable.taskSucceeded(taskAttemptID);
}
+ @Override
public synchronized boolean taskFailed(TezTaskAttemptID taskAttemptID, Throwable t, String diagnostics,
EventMetaData srcMeta) throws IOException, TezException {
return currentCallable.taskFailed(taskAttemptID, t, diagnostics, srcMeta);
}
+ @Override
public synchronized void addEvents(TezTaskAttemptID taskAttemptID, Collection<TezEvent> events) {
currentCallable.addEvents(taskAttemptID, events);
}
+ @Override
public boolean canCommit(TezTaskAttemptID taskAttemptID) throws IOException {
return umbilical.canCommit(taskAttemptID);
}
http://git-wip-us.apache.org/repos/asf/tez/blob/2b05376a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
index 7615f08..c4fd64c 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
@@ -66,6 +66,7 @@ import org.apache.tez.runtime.api.impl.ExecutionContextImpl;
import org.apache.tez.runtime.api.impl.TaskSpec;
import org.apache.tez.runtime.api.impl.TezUmbilical;
import org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl;
+import org.apache.tez.runtime.internals.api.TaskReporterInterface;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -111,7 +112,7 @@ public class TezChild {
private final boolean ownUmbilical;
private final TezTaskUmbilicalProtocol umbilical;
- private TaskReporter taskReporter;
+ private TaskReporterInterface taskReporter;
private int taskCount = 0;
private TezVertexID lastVertexID;
http://git-wip-us.apache.org/repos/asf/tez/blob/2b05376a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
index f54814b..33a7f4a 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner.java
@@ -41,6 +41,7 @@ import org.apache.tez.runtime.api.impl.EventMetaData;
import org.apache.tez.runtime.api.impl.TaskSpec;
import org.apache.tez.runtime.api.impl.TezEvent;
import org.apache.tez.runtime.api.impl.TezUmbilical;
+import org.apache.tez.runtime.internals.api.TaskReporterInterface;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -56,7 +57,7 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
private final LogicalIOProcessorRuntimeTask task;
private final UserGroupInformation ugi;
- private final TaskReporter taskReporter;
+ private final TaskReporterInterface taskReporter;
private final ListeningExecutorService executor;
private volatile ListenableFuture<Void> taskFuture;
private volatile Thread waitingThread;
@@ -70,7 +71,7 @@ public class TezTaskRunner implements TezUmbilical, ErrorReporter {
public TezTaskRunner(Configuration tezConf, UserGroupInformation ugi, String[] localDirs,
TaskSpec taskSpec, int appAttemptNumber,
Map<String, ByteBuffer> serviceConsumerMetadata, Map<String, String> serviceProviderEnvMap,
- Multimap<String, String> startedInputsMap, TaskReporter taskReporter,
+ Multimap<String, String> startedInputsMap, TaskReporterInterface taskReporter,
ListeningExecutorService executor, ObjectRegistry objectRegistry, String pid,
ExecutionContext executionContext, long memAvailable)
throws IOException {
[09/43] tez git commit: TEZ-2404. Handle DataMovementEvent before its
TaskAttemptCompletedEvent (zjffdu)
Posted by ss...@apache.org.
TEZ-2404. Handle DataMovementEvent before its TaskAttemptCompletedEvent (zjffdu)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/02870f0a
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/02870f0a
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/02870f0a
Branch: refs/heads/TEZ-2003
Commit: 02870f0ac1095d67a85a864860b0c4ce68a1db57
Parents: d5a0f39
Author: Jeff Zhang <zj...@apache.org>
Authored: Thu May 7 13:03:07 2015 +0800
Committer: Jeff Zhang <zj...@apache.org>
Committed: Thu May 7 13:03:07 2015 +0800
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../dag/app/TaskAttemptListenerImpTezDag.java | 35 +++++---------------
.../apache/tez/dag/app/dag/impl/VertexImpl.java | 8 +++++
.../app/TestTaskAttemptListenerImplTezDag.java | 21 +++++++-----
4 files changed, 31 insertions(+), 34 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/02870f0a/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 58648e4..7feefcc 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,7 @@ INCOMPATIBLE CHANGES
Default max limit increased. Should not affect existing users.
ALL CHANGES:
+ TEZ-2404. Handle DataMovementEvent before its TaskAttemptCompletedEvent
TEZ-2424. Bump up max counter group name length limit to account for per_io counters.
TEZ-2417. Tez UI: Counters are blank in the Attempts page if all attempts failed
TEZ-2366. Pig tez MiniTezCluster unit tests fail intermittently after TEZ-2333
http://git-wip-us.apache.org/repos/asf/tez/blob/02870f0a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index d96da83..b38081b 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -423,12 +423,17 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
List<TezEvent> otherEvents = new ArrayList<TezEvent>();
+ // route TASK_STATUS_UPDATE_EVENT directly to TaskAttempt and route other events
+ // (DATA_MOVEMENT_EVENT, TASK_ATTEMPT_COMPLETED_EVENT, TASK_ATTEMPT_FAILED_EVENT)
+ // to VertexImpl to ensure the events ordering
+ // 1. DataMovementEvent is logged as RecoveryEvent before TaskAttemptFinishedEvent
+ // 2. TaskStatusEvent is handled before TaskAttemptFinishedEvent
for (TezEvent tezEvent : ListUtils.emptyIfNull(inEvents)) {
final EventType eventType = tezEvent.getEventType();
- if (eventType == EventType.TASK_STATUS_UPDATE_EVENT ||
- eventType == EventType.TASK_ATTEMPT_COMPLETED_EVENT) {
- context.getEventHandler()
- .handle(getTaskAttemptEventFromTezEvent(taskAttemptID, tezEvent));
+ if (eventType == EventType.TASK_STATUS_UPDATE_EVENT) {
+ TaskAttemptEvent taskAttemptEvent = new TaskAttemptEventStatusUpdate(taskAttemptID,
+ (TaskStatusUpdateEvent) tezEvent.getEvent());
+ context.getEventHandler().handle(taskAttemptEvent);
} else {
otherEvents.add(tezEvent);
}
@@ -453,28 +458,6 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
}
- private TaskAttemptEvent getTaskAttemptEventFromTezEvent(TezTaskAttemptID taskAttemptID,
- TezEvent tezEvent) {
- final EventType eventType = tezEvent.getEventType();
- TaskAttemptEvent taskAttemptEvent;
- switch (eventType) {
- case TASK_STATUS_UPDATE_EVENT:
- {
- taskAttemptEvent = new TaskAttemptEventStatusUpdate(taskAttemptID,
- (TaskStatusUpdateEvent) tezEvent.getEvent());
- }
- break;
- case TASK_ATTEMPT_COMPLETED_EVENT:
- {
- taskAttemptEvent = new TaskAttemptEvent(taskAttemptID, TaskAttemptEventType.TA_DONE);
- }
- break;
- default:
- throw new TezUncheckedException("unknown event type " + eventType);
- }
- return taskAttemptEvent;
- }
-
private Map<String, TezLocalResource> convertLocalResourceMap(Map<String, LocalResource> ylrs)
throws IOException {
Map<String, TezLocalResource> tlrs = Maps.newHashMap();
http://git-wip-us.apache.org/repos/asf/tez/blob/02870f0a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
index 9ed7441..5d61642 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
@@ -132,6 +132,7 @@ import org.apache.tez.dag.app.dag.event.VertexEventTaskCompleted;
import org.apache.tez.dag.app.dag.event.VertexEventTaskReschedule;
import org.apache.tez.dag.app.dag.event.VertexEventTermination;
import org.apache.tez.dag.app.dag.event.VertexEventType;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEvent;
import org.apache.tez.dag.app.dag.impl.DAGImpl.VertexGroupInfo;
import org.apache.tez.dag.app.dag.speculation.legacy.LegacySpeculator;
import org.apache.tez.dag.history.DAGHistoryEvent;
@@ -4131,6 +4132,13 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
);
}
break;
+ case TASK_ATTEMPT_COMPLETED_EVENT:
+ {
+ checkEventSourceMetadata(vertex, sourceMeta);
+ vertex.getEventHandler().handle(
+ new TaskAttemptEvent(sourceMeta.getTaskAttemptID(), TaskAttemptEventType.TA_DONE));
+ }
+ break;
default:
throw new TezUncheckedException("Unhandled tez event type: "
+ tezEvent.getEventType());
http://git-wip-us.apache.org/repos/asf/tez/blob/02870f0a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
index ec4f99a..f974f40 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
@@ -24,6 +24,7 @@ import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import java.io.IOException;
+import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -53,6 +54,7 @@ import org.apache.tez.dag.records.TezDAGID;
import org.apache.tez.dag.records.TezTaskAttemptID;
import org.apache.tez.dag.records.TezTaskID;
import org.apache.tez.dag.records.TezVertexID;
+import org.apache.tez.runtime.api.events.DataMovementEvent;
import org.apache.tez.runtime.api.events.InputInitializerEvent;
import org.apache.tez.runtime.api.events.TaskAttemptCompletedEvent;
import org.apache.tez.runtime.api.events.TaskStatusUpdateEvent;
@@ -179,8 +181,9 @@ public class TestTaskAttemptListenerImplTezDag {
@Test (timeout = 5000)
public void testTaskEventRouting() throws Exception {
List<TezEvent> events = Arrays.asList(
- new TezEvent(InputInitializerEvent.create("test_vertex", "test_input", null), null),
- new TezEvent(new TaskStatusUpdateEvent(null, 0.0f, null), null)
+ new TezEvent(new TaskStatusUpdateEvent(null, 0.0f, null), null),
+ new TezEvent(DataMovementEvent.create(0, ByteBuffer.wrap(new byte[0])), null),
+ new TezEvent(new TaskAttemptCompletedEvent(), null)
);
EventHandler eventHandler = generateHeartbeat(events);
@@ -193,13 +196,15 @@ public class TestTaskAttemptListenerImplTezDag {
assertEquals("First event should be status update", TaskAttemptEventType.TA_STATUS_UPDATE,
statusUpdateEvent.getType());
-
final Event vertexEvent = argAllValues.get(1);
final VertexEventRouteEvent vertexRouteEvent = (VertexEventRouteEvent)vertexEvent;
- assertEquals("Other events should be routed to vertex", VertexEventType.V_ROUTE_EVENT,
+ assertEquals("First event should be routed to vertex", VertexEventType.V_ROUTE_EVENT,
vertexEvent.getType());
- assertEquals(EventType.ROOT_INPUT_INITIALIZER_EVENT,
+ assertEquals(EventType.DATA_MOVEMENT_EVENT,
vertexRouteEvent.getEvents().get(0).getEventType());
+ assertEquals(EventType.TASK_ATTEMPT_COMPLETED_EVENT,
+ vertexRouteEvent.getEvents().get(1).getEventType());
+
}
@Test (timeout = 5000)
@@ -213,9 +218,9 @@ public class TestTaskAttemptListenerImplTezDag {
verify(eventHandler, times(1)).handle(arg.capture());
final List<Event> argAllValues = arg.getAllValues();
- final Event statusUpdateEvent = argAllValues.get(0);
- assertEquals("only event should be task done", TaskAttemptEventType.TA_DONE,
- statusUpdateEvent.getType());
+ final Event event = argAllValues.get(0);
+ assertEquals("only event should be route event", VertexEventType.V_ROUTE_EVENT,
+ event.getType());
}
private EventHandler generateHeartbeat(List<TezEvent> events) throws IOException, TezException {
[05/43] tez git commit: Move TEZ-1752 into 0.8.0 release section.
Posted by ss...@apache.org.
Move TEZ-1752 into 0.8.0 release section.
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/7476fae8
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/7476fae8
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/7476fae8
Branch: refs/heads/TEZ-2003
Commit: 7476fae834bafb7a36d17382a08a5385fee0e343
Parents: 16bbc58
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed May 6 14:38:10 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Wed May 6 14:38:10 2015 -0700
----------------------------------------------------------------------
CHANGES.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/7476fae8/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index d3aa8a9..fd45454 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -7,6 +7,7 @@ Release 0.8.0: Unreleased
INCOMPATIBLE CHANGES
ALL CHANGES:
+ TEZ-1752. Inputs / Outputs in the Runtime library should be interruptable.
Release 0.7.0: Unreleased
@@ -20,7 +21,6 @@ ALL CHANGES:
TEZ-2406. Tez UI: Display per-io counter columns in task and attempt pages under vertex
TEZ-2384. Add warning message in the case of prewarn under non-session mode.
TEZ-2415. PMC RDF needs to use asfext:pmc, not asfext:PMC.
- TEZ-1752. Inputs / Outputs in the Runtime library should be interruptable.
TEZ-2401. Tez UI: All-dag page has duration keep counting for KILLED dag.
TEZ-2392. Have all readers throw an Exception on incorrect next() usage.
TEZ-2408. TestTaskAttempt fails to compile against hadoop-2.4 and hadoop-2.2.
[10/43] tez git commit: TEZ-2416. Tez UI: Make tooltips display
faster. (Sreenath Somarajapuram via pramachandran)
Posted by ss...@apache.org.
TEZ-2416. Tez UI: Make tooltips display faster. (Sreenath Somarajapuram via pramachandran)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/70cd396d
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/70cd396d
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/70cd396d
Branch: refs/heads/TEZ-2003
Commit: 70cd396d27188aa5e79280552199e7c81f7d2822
Parents: 02870f0
Author: Prakash Ramachandran <pr...@hortonworks.com>
Authored: Thu May 7 17:45:04 2015 +0530
Committer: Prakash Ramachandran <pr...@hortonworks.com>
Committed: Thu May 7 17:45:04 2015 +0530
----------------------------------------------------------------------
CHANGES.txt | 1 +
tez-ui/src/main/webapp/app/index.html | 1 -
tez-ui/src/main/webapp/app/scripts/app.js | 5 +++++
tez-ui/src/main/webapp/app/styles/main.less | 9 ++++++++-
4 files changed, 14 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/70cd396d/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 7feefcc..c3d48b6 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,7 @@ INCOMPATIBLE CHANGES
Default max limit increased. Should not affect existing users.
ALL CHANGES:
+ TEZ-2416. Tez UI: Make tooltips display faster.
TEZ-2404. Handle DataMovementEvent before its TaskAttemptCompletedEvent
TEZ-2424. Bump up max counter group name length limit to account for per_io counters.
TEZ-2417. Tez UI: Counters are blank in the Attempts page if all attempts failed
http://git-wip-us.apache.org/repos/asf/tez/blob/70cd396d/tez-ui/src/main/webapp/app/index.html
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/index.html b/tez-ui/src/main/webapp/app/index.html
index f87b2a6..35cfa20 100644
--- a/tez-ui/src/main/webapp/app/index.html
+++ b/tez-ui/src/main/webapp/app/index.html
@@ -39,7 +39,6 @@
<script src="bower_components/jquery-ui/jquery-ui.js"></script>
<script src="bower_components/bootstrap/js/dropdown.js"></script>
<script src="bower_components/bootstrap/js/button.js"></script>
- <script src="bower_components/bootstrap/js/tooltip.js"></script>
<script src="bower_components/jquery-ui/ui/datepicker.js"></script>
<script src="bower_components/moment/moment.js"></script>
<script src="bower_components/handlebars/handlebars.js"></script>
http://git-wip-us.apache.org/repos/asf/tez/blob/70cd396d/tez-ui/src/main/webapp/app/scripts/app.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/app.js b/tez-ui/src/main/webapp/app/scripts/app.js
index 37aa563..dd493d0 100644
--- a/tez-ui/src/main/webapp/app/scripts/app.js
+++ b/tez-ui/src/main/webapp/app/scripts/app.js
@@ -54,6 +54,11 @@ App.Configs = Em.Namespace.create();
App.ready = function () {
$.extend(App.env, App.Configs.envDefaults);
+ $(document).tooltip({
+ delay: 20,
+ tooltipClass: 'generic-tooltip'
+ });
+
["timelineBaseUrl", "RMWebUrl"].forEach(function(item) {
if (!!App.env[item]) {
App.env[item] = App.Helpers.misc.normalizePath(App.env[item]);
http://git-wip-us.apache.org/repos/asf/tez/blob/70cd396d/tez-ui/src/main/webapp/app/styles/main.less
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/styles/main.less b/tez-ui/src/main/webapp/app/styles/main.less
index ad3f132..e89d4c3 100644
--- a/tez-ui/src/main/webapp/app/styles/main.less
+++ b/tez-ui/src/main/webapp/app/styles/main.less
@@ -897,4 +897,11 @@ body, html {
.per-io {
color: @text-green;
-}
\ No newline at end of file
+}
+
+.generic-tooltip {
+ padding: 3px 5px !important;
+ background: rgba(0,0,0,.8) !important;
+ color: white !important;
+ border: none !important;
+}
[02/43] tez git commit: TEZ-2366. Pig tez MiniTezCluster unit tests
fail intermittently after TEZ-2333 (pramachandran)
Posted by ss...@apache.org.
TEZ-2366. Pig tez MiniTezCluster unit tests fail intermittently after TEZ-2333 (pramachandran)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/6a04fa48
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/6a04fa48
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/6a04fa48
Branch: refs/heads/TEZ-2003
Commit: 6a04fa48cb1113faf640115dcbba9b2270e756f3
Parents: 12ef073
Author: Prakash Ramachandran <pr...@hortonworks.com>
Authored: Wed May 6 19:11:06 2015 +0530
Committer: Prakash Ramachandran <pr...@hortonworks.com>
Committed: Wed May 6 19:11:06 2015 +0530
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../runtime/library/common/shuffle/Fetcher.java | 17 ++---
.../common/shuffle/impl/ShuffleManager.java | 12 +++-
.../orderedgrouped/FetcherOrderedGrouped.java | 67 ++++++++++--------
.../common/shuffle/orderedgrouped/Shuffle.java | 11 ++-
.../library/common/shuffle/TestFetcher.java | 49 +++++++++++--
.../impl/TestShuffleInputEventHandlerImpl.java | 14 +++-
.../shuffle/orderedgrouped/TestFetcher.java | 74 +++++++++++++++++++-
8 files changed, 191 insertions(+), 54 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/6a04fa48/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index f060a8c..91dd9c4 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -15,6 +15,7 @@ INCOMPATIBLE CHANGES
TEZ-1993. Implement a pluggable InputSizeEstimator for grouping fairly
ALL CHANGES:
+ TEZ-2366. Pig tez MiniTezCluster unit tests fail intermittently after TEZ-2333
TEZ-2406. Tez UI: Display per-io counter columns in task and attempt pages under vertex
TEZ-2384. Add warning message in the case of prewarn under non-session mode.
TEZ-2415. PMC RDF needs to use asfext:pmc, not asfext:PMC.
http://git-wip-us.apache.org/repos/asf/tez/blob/6a04fa48/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java
index 48fe0f2..61e0151 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java
@@ -33,7 +33,6 @@ import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
@@ -48,7 +47,6 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RawLocalFileSystem;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
-import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.tez.common.CallableWithNdc;
import org.apache.tez.common.security.JobTokenSecretManager;
@@ -75,6 +73,7 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
private static final AtomicInteger fetcherIdGen = new AtomicInteger(0);
private final Configuration conf;
+ private final int shufflePort;
// Configurable fields.
private CompressionCodec codec;
@@ -132,7 +131,8 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
Path lockPath,
boolean localDiskFetchEnabled,
boolean sharedFetchEnabled,
- String localHostname) {
+ String localHostname,
+ int shufflePort) {
this.fetcherCallback = fetcherCallback;
this.inputManager = inputManager;
this.jobTokenSecretMgr = jobTokenSecretManager;
@@ -151,6 +151,7 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
this.localDirAllocator = localDirAllocator;
this.lockPath = lockPath;
this.localHostname = localHostname;
+ this.shufflePort = shufflePort;
try {
if (this.sharedFetchEnabled) {
@@ -186,7 +187,7 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
HostFetchResult hostFetchResult;
- if (localDiskFetchEnabled && host.equals(localHostname)) {
+ if (localDiskFetchEnabled && host.equals(localHostname) && port == shufflePort) {
hostFetchResult = setupLocalDiskFetch();
} else if (multiplex) {
hostFetchResult = doSharedFetch();
@@ -902,10 +903,10 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
public FetcherBuilder(FetcherCallback fetcherCallback,
HttpConnectionParams params, FetchedInputAllocator inputManager,
ApplicationId appId, JobTokenSecretManager jobTokenSecretMgr, String srcNameTrimmed,
- Configuration conf, boolean localDiskFetchEnabled, String localHostname) {
+ Configuration conf, boolean localDiskFetchEnabled, String localHostname, int shufflePort) {
this.fetcher = new Fetcher(fetcherCallback, params, inputManager, appId,
jobTokenSecretMgr, srcNameTrimmed, conf, null, null, null, localDiskFetchEnabled,
- false, localHostname);
+ false, localHostname, shufflePort);
}
public FetcherBuilder(FetcherCallback fetcherCallback,
@@ -914,10 +915,10 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
Configuration conf, RawLocalFileSystem localFs,
LocalDirAllocator localDirAllocator, Path lockPath,
boolean localDiskFetchEnabled, boolean sharedFetchEnabled,
- String localHostname) {
+ String localHostname, int shufflePort) {
this.fetcher = new Fetcher(fetcherCallback, params, inputManager, appId,
jobTokenSecretMgr, srcNameTrimmed, conf, localFs, localDirAllocator,
- lockPath, localDiskFetchEnabled, sharedFetchEnabled, localHostname);
+ lockPath, localDiskFetchEnabled, sharedFetchEnabled, localHostname, shufflePort);
}
public FetcherBuilder setHttpConnectionParameters(HttpConnectionParams httpParams) {
http://git-wip-us.apache.org/repos/asf/tez/blob/6a04fa48/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
index d47e652..ac7caca 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
@@ -21,6 +21,7 @@ package org.apache.tez.runtime.library.common.shuffle.impl;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.nio.ByteBuffer;
import java.text.DecimalFormat;
import java.util.Arrays;
import java.util.BitSet;
@@ -161,7 +162,8 @@ public class ShuffleManager implements FetcherCallback {
private final LocalDirAllocator localDirAllocator;
private final RawLocalFileSystem localFs;
private final Path[] localDisks;
- private final static String localhostName = NetUtils.getHostname();
+ private final String localhostName;
+ private final int shufflePort;
private final TezCounter shufflePhaseTime;
private final TezCounter firstEventReceived;
@@ -216,7 +218,7 @@ public class ShuffleManager implements FetcherCallback {
int maxConfiguredFetchers =
conf.getInt(
- TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES,
+ TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES,
TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES_DEFAULT);
this.numFetchers = Math.min(maxConfiguredFetchers, numInputs);
@@ -249,6 +251,10 @@ public class ShuffleManager implements FetcherCallback {
this.localDisks = Iterables.toArray(
localDirAllocator.getAllLocalPathsToRead(".", conf), Path.class);
+ this.localhostName = inputContext.getExecutionContext().getHostName();
+ final ByteBuffer shuffleMetaData =
+ inputContext.getServiceProviderMetaData(ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID);
+ this.shufflePort = ShuffleUtils.deserializeShuffleProviderMetaData(shuffleMetaData);
Arrays.sort(this.localDisks);
@@ -390,7 +396,7 @@ public class ShuffleManager implements FetcherCallback {
httpConnectionParams, inputManager, inputContext.getApplicationId(),
jobTokenSecretMgr, srcNameTrimmed, conf, localFs, localDirAllocator,
lockDisk, localDiskFetchEnabled, sharedFetchEnabled,
- inputContext.getExecutionContext().getHostName());
+ localhostName, shufflePort);
if (codec != null) {
fetcherBuilder.setCompressionParameters(codec);
http://git-wip-us.apache.org/repos/asf/tez/blob/6a04fa48/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java
index fbaabff..60f1c98 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java
@@ -56,7 +56,7 @@ class FetcherOrderedGrouped extends Thread {
private final Configuration conf;
private final boolean localDiskFetchEnabled;
- private static enum ShuffleErrors{IO_ERROR, WRONG_LENGTH, BAD_ID, WRONG_MAP,
+ private enum ShuffleErrors{IO_ERROR, WRONG_LENGTH, BAD_ID, WRONG_MAP,
CONNECTION, WRONG_REDUCE}
private final static String SHUFFLE_ERR_GRP_NAME = "Shuffle Errors";
@@ -72,7 +72,7 @@ class FetcherOrderedGrouped extends Thread {
private final Shuffle shuffle;
private final int id;
private final String logIdentifier;
- private final String localHostname;
+ private final String localShuffleHostPort;
private static int nextId = 0;
private int currentPartition = -1;
@@ -104,7 +104,8 @@ class FetcherOrderedGrouped extends Thread {
CompressionCodec codec,
InputContext inputContext, Configuration conf,
boolean localDiskFetchEnabled,
- String localHostname) throws IOException {
+ String localHostname,
+ int shufflePort) throws IOException {
setDaemon(true);
this.scheduler = scheduler;
this.merger = merger;
@@ -134,7 +135,7 @@ class FetcherOrderedGrouped extends Thread {
this.codec = null;
}
this.conf = conf;
- this.localHostname = localHostname;
+ this.localShuffleHostPort = localHostname + ":" + String.valueOf(shufflePort);
this.localDiskFetchEnabled = localDiskFetchEnabled;
@@ -144,37 +145,41 @@ class FetcherOrderedGrouped extends Thread {
setDaemon(true);
}
- public void run() {
+ @VisibleForTesting
+ protected void fetchNext() throws InterruptedException, IOException {
+ MapHost host = null;
try {
- while (!stopped && !Thread.currentThread().isInterrupted()) {
- remaining = null; // Safety.
- MapHost host = null;
- try {
- // If merge is on, block
- merger.waitForInMemoryMerge();
+ // If merge is on, block
+ merger.waitForInMemoryMerge();
- // In case usedMemory > memorylimit, wait until some memory is released
- merger.waitForShuffleToMergeMemory();
+ // In case usedMemory > memorylimit, wait until some memory is released
+ merger.waitForShuffleToMergeMemory();
- // Get a host to shuffle from
- host = scheduler.getHost();
- metrics.threadBusy();
+ // Get a host to shuffle from
+ host = scheduler.getHost();
+ metrics.threadBusy();
- String hostPort = host.getHostIdentifier();
- String hostname = hostPort.substring(0, hostPort.indexOf(":"));
- if (localDiskFetchEnabled && hostname.equals(localHostname)) {
- setupLocalDiskFetch(host);
- } else {
- // Shuffle
- copyFromHost(host);
- }
- } finally {
- cleanupCurrentConnection(false);
- if (host != null) {
- scheduler.freeHost(host);
- metrics.threadFree();
- }
- }
+ String hostPort = host.getHostIdentifier();
+ if (localDiskFetchEnabled && hostPort.equals(localShuffleHostPort)) {
+ setupLocalDiskFetch(host);
+ } else {
+ // Shuffle
+ copyFromHost(host);
+ }
+ } finally {
+ cleanupCurrentConnection(false);
+ if (host != null) {
+ scheduler.freeHost(host);
+ metrics.threadFree();
+ }
+ }
+ }
+
+ public void run() {
+ try {
+ while (!stopped && !Thread.currentThread().isInterrupted()) {
+ remaining = null; // Safety.
+ fetchNext();
}
} catch (InterruptedException ie) {
//TODO: might not be respected when fetcher is in progress / server is busy. TEZ-711
http://git-wip-us.apache.org/repos/asf/tez/blob/6a04fa48/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
index 442f032..ee05378 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
@@ -19,6 +19,7 @@ package org.apache.tez.runtime.library.common.shuffle.orderedgrouped;
import java.io.IOException;
import java.lang.reflect.UndeclaredThrowableException;
+import java.nio.ByteBuffer;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
@@ -95,6 +96,8 @@ public class Shuffle implements ExceptionReporter {
private final int ifileReadAheadLength;
private final int numFetchers;
private final boolean localDiskFetchEnabled;
+ private final String localHostname;
+ private final int shufflePort;
private AtomicReference<Throwable> throwable = new AtomicReference<Throwable>();
private String throwingThreadName = null;
@@ -158,6 +161,11 @@ public class Shuffle implements ExceptionReporter {
LocalDirAllocator localDirAllocator =
new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
+ this.localHostname = inputContext.getExecutionContext().getHostName();
+ final ByteBuffer shuffleMetadata =
+ inputContext.getServiceProviderMetaData(ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID);
+ this.shufflePort = ShuffleUtils.deserializeShuffleProviderMetaData(shuffleMetadata);
+
// TODO TEZ Get rid of Map / Reduce references.
TezCounter shuffledInputsCounter =
inputContext.getCounters().findCounter(TaskCounter.NUM_SHUFFLED_INPUTS);
@@ -336,8 +344,7 @@ public class Shuffle implements ExceptionReporter {
FetcherOrderedGrouped
fetcher = new FetcherOrderedGrouped(httpConnectionParams, scheduler, merger,
metrics, Shuffle.this, jobTokenSecretMgr, ifileReadAhead, ifileReadAheadLength,
- codec, inputContext, conf, localDiskFetchEnabled,
- inputContext.getExecutionContext().getHostName());
+ codec, inputContext, conf, localDiskFetchEnabled, localHostname, shufflePort);
fetchers.add(fetcher);
fetcher.start();
}
http://git-wip-us.apache.org/repos/asf/tez/blob/6a04fa48/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
index e6f0c4a..4ef187d 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
@@ -31,6 +31,7 @@ import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.verify;
import java.io.IOException;
+import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.List;
@@ -38,8 +39,11 @@ import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.runtime.api.ExecutionContext;
+import org.apache.tez.runtime.api.InputContext;
import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
import org.apache.tez.runtime.library.common.InputAttemptIdentifier;
import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord;
@@ -52,19 +56,21 @@ import org.mockito.stubbing.Answer;
public class TestFetcher {
private static final String SHUFFLE_INPUT_FILE_PREFIX = "shuffle_input_file_";
private static String HOST = "localhost";
- private static int PORT = 0;
+ private static int PORT = 41;
@Test(timeout = 3000)
public void testLocalFetchModeSetting() throws Exception {
TezConfiguration conf = new TezConfiguration();
- conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, "true");
+ conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true);
InputAttemptIdentifier[] srcAttempts = {
new InputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1")
};
FetcherCallback fetcherCallback = mock(FetcherCallback.class);
+ final boolean ENABLE_LOCAL_FETCH = true;
+ final boolean DISABLE_LOCAL_FETCH = false;
Fetcher.FetcherBuilder builder = new Fetcher.FetcherBuilder(fetcherCallback, null, null,
- ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, true, HOST);
+ ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, ENABLE_LOCAL_FETCH, HOST, PORT);
builder.assignWork(HOST, PORT, 0, Arrays.asList(srcAttempts));
Fetcher fetcher = spy(builder.build());
@@ -79,10 +85,41 @@ public class TestFetcher {
verify(fetcher).setupLocalDiskFetch();
verify(fetcher, never()).doHttpFetch();
+ // when enabled and hostname does not match use http fetch.
+ builder = new Fetcher.FetcherBuilder(fetcherCallback, null, null,
+ ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, ENABLE_LOCAL_FETCH, HOST,
+ PORT);
+ builder.assignWork(HOST + "_OTHER", PORT, 0, Arrays.asList(srcAttempts));
+ fetcher = spy(builder.build());
+
+ doReturn(null).when(fetcher).setupLocalDiskFetch();
+ doReturn(hfr).when(fetcher).doHttpFetch();
+ doNothing().when(fetcher).shutdown();
+
+ fetcher.call();
+
+ verify(fetcher, never()).setupLocalDiskFetch();
+ verify(fetcher).doHttpFetch();
+
+ // when enabled and port does not match use http fetch.
+ builder = new Fetcher.FetcherBuilder(fetcherCallback, null, null,
+ ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, ENABLE_LOCAL_FETCH, HOST, PORT);
+ builder.assignWork(HOST, PORT + 1, 0, Arrays.asList(srcAttempts));
+ fetcher = spy(builder.build());
+
+ doReturn(null).when(fetcher).setupLocalDiskFetch();
+ doReturn(hfr).when(fetcher).doHttpFetch();
+ doNothing().when(fetcher).shutdown();
+
+ fetcher.call();
+
+ verify(fetcher, never()).setupLocalDiskFetch();
+ verify(fetcher).doHttpFetch();
+
// When disabled use http fetch
- conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, "false");
+ conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, false);
builder = new Fetcher.FetcherBuilder(fetcherCallback, null, null,
- ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, false, HOST);
+ ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, DISABLE_LOCAL_FETCH, HOST, PORT);
builder.assignWork(HOST, PORT, 0, Arrays.asList(srcAttempts));
fetcher = spy(builder.build());
@@ -115,7 +152,7 @@ public class TestFetcher {
int partition = 42;
FetcherCallback callback = mock(FetcherCallback.class);
Fetcher.FetcherBuilder builder = new Fetcher.FetcherBuilder(callback, null, null,
- ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, true, HOST);
+ ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, true, HOST, PORT);
builder.assignWork(HOST, PORT, partition, Arrays.asList(srcAttempts));
Fetcher fetcher = spy(builder.build());
http://git-wip-us.apache.org/repos/asf/tez/blob/6a04fa48/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java
index 44122a2..c452898 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java
@@ -49,11 +49,13 @@ import org.apache.tez.common.security.JobTokenIdentifier;
import org.apache.tez.common.security.JobTokenSecretManager;
import org.apache.tez.dag.api.TezConstants;
import org.apache.tez.runtime.api.Event;
+import org.apache.tez.runtime.api.ExecutionContext;
import org.apache.tez.runtime.api.InputContext;
import org.apache.tez.runtime.api.events.DataMovementEvent;
import org.apache.tez.runtime.library.common.InputAttemptIdentifier;
import org.apache.tez.runtime.library.common.InputIdentifier;
import org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator;
+import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils;
import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto;
import org.junit.Test;
@@ -156,10 +158,20 @@ public class TestShuffleInputEventHandlerImpl {
}
- private InputContext createInputContext() {
+ private InputContext createInputContext() throws IOException {
+ DataOutputBuffer port_dob = new DataOutputBuffer();
+ port_dob.writeInt(PORT);
+ final ByteBuffer shuffleMetaData = ByteBuffer.wrap(port_dob.getData(), 0, port_dob.getLength());
+
+ ExecutionContext executionContext = mock(ExecutionContext.class);
+ doReturn(HOST).when(executionContext).getHostName();
+
InputContext inputContext = mock(InputContext.class);
doReturn(new TezCounters()).when(inputContext).getCounters();
doReturn("sourceVertex").when(inputContext).getSourceVertexName();
+ doReturn(shuffleMetaData).when(inputContext)
+ .getServiceProviderMetaData(ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID);
+ doReturn(executionContext).when(inputContext).getExecutionContext();
return inputContext;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/6a04fa48/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java
index 2e826d8..c33905f 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java
@@ -29,6 +29,7 @@ import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.doNothing;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
@@ -39,9 +40,11 @@ import java.io.DataInputStream;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;
+import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.List;
+import org.apache.hadoop.io.DataOutputBuffer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -68,11 +71,76 @@ public class TestFetcher {
public static final String SHUFFLE_INPUT_FILE_PREFIX = "shuffle_input_file_";
public static final String HOST = "localhost";
- public static final int PORT = 0;
+ public static final int PORT = 65;
static final Logger LOG = LoggerFactory.getLogger(TestFetcher.class);
@Test(timeout = 5000)
+ public void testLocalFetchModeSetting1() throws Exception {
+ Configuration conf = new TezConfiguration();
+ ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
+ MergeManager merger = mock(MergeManager.class);
+ ShuffleClientMetrics metrics = mock(ShuffleClientMetrics.class);
+ Shuffle shuffle = mock(Shuffle.class);
+
+ InputContext inputContext = mock(InputContext.class);
+ doReturn(new TezCounters()).when(inputContext).getCounters();
+ doReturn("src vertex").when(inputContext).getSourceVertexName();
+
+ final boolean ENABLE_LOCAL_FETCH = true;
+ final boolean DISABLE_LOCAL_FETCH = false;
+ MapHost mapHost = new MapHost(0, HOST + ":" + PORT, "baseurl");
+ FetcherOrderedGrouped
+ fetcher = new FetcherOrderedGrouped(null, scheduler, merger, metrics, shuffle, null,
+ false, 0, null, inputContext, conf, ENABLE_LOCAL_FETCH, HOST, PORT);
+
+ // when local mode is enabled and host and port matches use local fetch
+ FetcherOrderedGrouped spyFetcher = spy(fetcher);
+ doNothing().when(spyFetcher).setupLocalDiskFetch(mapHost);
+ doReturn(mapHost).when(scheduler).getHost();
+
+ spyFetcher.fetchNext();
+
+ verify(spyFetcher, times(1)).setupLocalDiskFetch(mapHost);
+ verify(spyFetcher, never()).copyFromHost(any(MapHost.class));
+
+ // if hostname does not match use http
+ spyFetcher = spy(fetcher);
+ mapHost = new MapHost(0, HOST + "_OTHER" + ":" + PORT, "baseurl");
+ doNothing().when(spyFetcher).setupLocalDiskFetch(mapHost);
+ doReturn(mapHost).when(scheduler).getHost();
+
+ spyFetcher.fetchNext();
+
+ verify(spyFetcher, never()).setupLocalDiskFetch(any(MapHost.class));
+ verify(spyFetcher, times(1)).copyFromHost(mapHost);
+
+ // if port does not match use http
+ spyFetcher = spy(fetcher);
+ mapHost = new MapHost(0, HOST + ":" + (PORT + 1), "baseurl");
+ doNothing().when(spyFetcher).setupLocalDiskFetch(mapHost);
+ doReturn(mapHost).when(scheduler).getHost();
+
+ spyFetcher.fetchNext();
+
+ verify(spyFetcher, never()).setupLocalDiskFetch(any(MapHost.class));
+ verify(spyFetcher, times(1)).copyFromHost(mapHost);
+
+ //if local fetch is not enabled
+ mapHost = new MapHost(0, HOST + ":" + PORT, "baseurl");
+ fetcher = new FetcherOrderedGrouped(null, scheduler, merger, metrics, shuffle, null,
+ false, 0, null, inputContext, conf, DISABLE_LOCAL_FETCH, HOST, PORT);
+ spyFetcher = spy(fetcher);
+ doNothing().when(spyFetcher).setupLocalDiskFetch(mapHost);
+ doReturn(mapHost).when(scheduler).getHost();
+
+ spyFetcher.fetchNext();
+
+ verify(spyFetcher, never()).setupLocalDiskFetch(any(MapHost.class));
+ verify(spyFetcher, times(1)).copyFromHost(mapHost);
+ }
+
+ @Test(timeout = 5000)
public void testSetupLocalDiskFetch() throws Exception {
Configuration conf = new TezConfiguration();
ShuffleScheduler scheduler = mock(ShuffleScheduler.class);
@@ -85,7 +153,7 @@ public class TestFetcher {
FetcherOrderedGrouped
fetcher = new FetcherOrderedGrouped(null, scheduler, merger, metrics, shuffle, null,
- false, 0, null, inputContext, conf, true, HOST);
+ false, 0, null, inputContext, conf, true, HOST, PORT);
FetcherOrderedGrouped spyFetcher = spy(fetcher);
MapHost host = new MapHost(1, HOST + ":" + PORT,
@@ -228,7 +296,7 @@ public class TestFetcher {
ShuffleUtils.constructHttpShuffleConnectionParams(conf);
FetcherOrderedGrouped mockFetcher =
new FetcherOrderedGrouped(httpConnectionParams, scheduler, merger, metrics, shuffle, null,
- false, 0, null, inputContext, conf, false, HOST);
+ false, 0, null, inputContext, conf, false, HOST, PORT);
final FetcherOrderedGrouped fetcher = spy(mockFetcher);
final MapHost host = new MapHost(1, HOST + ":" + PORT,
[18/43] tez git commit: TEZ-2019. Temporarily allow the scheduler and
launcher to be specified via configuration. (sseth)
Posted by ss...@apache.org.
TEZ-2019. Temporarily allow the scheduler and launcher to be specified
via configuration. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/44bea934
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/44bea934
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/44bea934
Branch: refs/heads/TEZ-2003
Commit: 44bea934be1441323ed764692ec9287f06fdc2c2
Parents: ce69aa1
Author: Siddharth Seth <ss...@apache.org>
Authored: Fri Jan 30 16:02:32 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:36:09 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 4 +++
.../apache/tez/dag/api/TezConfiguration.java | 6 ++++
.../org/apache/tez/dag/app/DAGAppMaster.java | 30 ++++++++++++++++-
.../dag/app/rm/TaskSchedulerEventHandler.java | 34 ++++++++++++++++++--
.../org/apache/tez/runtime/task/TezChild.java | 3 +-
5 files changed, 73 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/44bea934/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
new file mode 100644
index 0000000..1822fcb
--- /dev/null
+++ b/TEZ-2003-CHANGES.txt
@@ -0,0 +1,4 @@
+ALL CHANGES:
+ TEZ-2019. Temporarily allow the scheduler and launcher to be specified via configuration.
+
+INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/44bea934/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
index e4170e9..01e724e 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
@@ -1168,6 +1168,12 @@ public class TezConfiguration extends Configuration {
+ "tez-ui.webservice.enable";
public static final boolean TEZ_AM_WEBSERVICE_ENABLE_DEFAULT = true;
+ @ConfigurationScope(Scope.VERTEX)
+ public static final String TEZ_AM_CONTAINER_LAUNCHER_CLASS = TEZ_AM_PREFIX + "container-launcher.class";
+ @ConfigurationScope(Scope.VERTEX)
+ public static final String TEZ_AM_TASK_SCHEDULER_CLASS = TEZ_AM_PREFIX + "task-scheduler.class";
+
+
// TODO only validate property here, value can also be validated if necessary
public static void validateProperty(String property, Scope usedScope) {
Scope validScope = PropertyScope.get(property);
http://git-wip-us.apache.org/repos/asf/tez/blob/44bea934/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index 3e3d6f0..73ee56e 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -25,6 +25,8 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
@@ -93,6 +95,7 @@ import org.apache.hadoop.yarn.util.SystemClock;
import org.apache.tez.common.AsyncDispatcher;
import org.apache.tez.common.AsyncDispatcherConcurrent;
import org.apache.tez.common.GcTimeUpdater;
+import org.apache.tez.common.ReflectionUtils;
import org.apache.tez.common.TezCommonUtils;
import org.apache.tez.common.TezConverterUtils;
import org.apache.tez.common.TezUtilsInternal;
@@ -1034,9 +1037,34 @@ public class DAGAppMaster extends AbstractService {
protected ContainerLauncher
createContainerLauncher(final AppContext context) throws UnknownHostException {
if(isLocal){
+ LOG.info("Creating LocalContainerLauncher");
return new LocalContainerLauncher(context, taskAttemptListener, workingDirectory);
} else {
- return new ContainerLauncherImpl(context);
+ // TODO: Temporary reflection with specific parameters until a clean interface is defined.
+ String containerLauncherClassName = getConfig().get(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHER_CLASS);
+ if (containerLauncherClassName == null) {
+ LOG.info("Creating Default Container Launcher");
+ return new ContainerLauncherImpl(context);
+ } else {
+ LOG.info("Creating container launcher : " + containerLauncherClassName);
+ Class<? extends ContainerLauncher> containerLauncherClazz = (Class<? extends ContainerLauncher>) ReflectionUtils.getClazz(
+ containerLauncherClassName);
+ try {
+ Constructor<? extends ContainerLauncher> ctor = containerLauncherClazz
+ .getConstructor(AppContext.class, Configuration.class, TaskAttemptListener.class);
+ ctor.setAccessible(true);
+ ContainerLauncher instance = ctor.newInstance(context, getConfig(), taskAttemptListener);
+ return instance;
+ } catch (NoSuchMethodException e) {
+ throw new TezUncheckedException(e);
+ } catch (InvocationTargetException e) {
+ throw new TezUncheckedException(e);
+ } catch (InstantiationException e) {
+ throw new TezUncheckedException(e);
+ } catch (IllegalAccessException e) {
+ throw new TezUncheckedException(e);
+ }
+ }
}
}
http://git-wip-us.apache.org/repos/asf/tez/blob/44bea934/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
index 19db660..62f82db 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
@@ -18,6 +18,8 @@
package org.apache.tez.dag.app.rm;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
import java.util.List;
@@ -42,6 +44,7 @@ import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.event.Event;
import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.tez.common.ReflectionUtils;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.api.TaskLocationHint;
@@ -329,12 +332,39 @@ public class TaskSchedulerEventHandler extends AbstractService
boolean isLocal = getConfig().getBoolean(TezConfiguration.TEZ_LOCAL_MODE,
TezConfiguration.TEZ_LOCAL_MODE_DEFAULT);
if (isLocal) {
+ LOG.info("Using TaskScheduler: LocalTaskSchedulerService");
return new LocalTaskSchedulerService(this, this.containerSignatureMatcher,
host, port, trackingUrl, appContext);
}
else {
- return new YarnTaskSchedulerService(this, this.containerSignatureMatcher,
- host, port, trackingUrl, appContext);
+ String schedulerClassName = getConfig().get(TezConfiguration.TEZ_AM_TASK_SCHEDULER_CLASS);
+ if (schedulerClassName == null) {
+ LOG.info("Using TaskScheduler: YarnTaskSchedulerService");
+ return new YarnTaskSchedulerService(this, this.containerSignatureMatcher,
+ host, port, trackingUrl, appContext);
+ } else {
+ LOG.info("Using custom TaskScheduler: " + schedulerClassName);
+ // TODO Temporary reflection with specific parameters. Remove once there is a clean interface.
+ Class<? extends TaskSchedulerService> taskSchedulerClazz =
+ (Class<? extends TaskSchedulerService>) ReflectionUtils.getClazz(schedulerClassName);
+ try {
+ Constructor<? extends TaskSchedulerService> ctor = taskSchedulerClazz
+ .getConstructor(TaskSchedulerAppCallback.class, AppContext.class, String.class,
+ Integer.class, String.class, Configuration.class);
+ ctor.setAccessible(true);
+ TaskSchedulerService taskSchedulerService =
+ ctor.newInstance(this, appContext, host, port, trackingUrl, getConfig());
+ return taskSchedulerService;
+ } catch (NoSuchMethodException e) {
+ throw new TezUncheckedException(e);
+ } catch (InvocationTargetException e) {
+ throw new TezUncheckedException(e);
+ } catch (InstantiationException e) {
+ throw new TezUncheckedException(e);
+ } catch (IllegalAccessException e) {
+ throw new TezUncheckedException(e);
+ }
+ }
}
}
http://git-wip-us.apache.org/repos/asf/tez/blob/44bea934/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
index bfec349..fd55992 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java
@@ -355,7 +355,8 @@ public class TezChild {
}
if (!isLocal) {
RPC.stopProxy(umbilical);
- LogManager.shutdown();
+ // TODO Temporary change. Revert. Ideally, move this over to the main method in TezChild if possible.
+// LogManager.shutdown();
}
}
}
[13/43] tez git commit: TEZ-776. Reduce AM mem usage caused by
storing TezEvents (bikas)
Posted by ss...@apache.org.
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
index 5d61642..a16ee0a 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
@@ -91,6 +91,7 @@ import org.apache.tez.dag.api.records.DAGProtos.RootInputLeafOutputProto;
import org.apache.tez.dag.api.records.DAGProtos.VertexPlan;
import org.apache.tez.dag.app.AppContext;
import org.apache.tez.dag.app.ContainerContext;
+import org.apache.tez.dag.app.TaskAttemptEventInfo;
import org.apache.tez.dag.app.TaskAttemptListener;
import org.apache.tez.dag.app.TaskHeartbeatHandler;
import org.apache.tez.dag.app.dag.DAG;
@@ -134,6 +135,7 @@ import org.apache.tez.dag.app.dag.event.VertexEventTermination;
import org.apache.tez.dag.app.dag.event.VertexEventType;
import org.apache.tez.dag.app.dag.event.TaskAttemptEvent;
import org.apache.tez.dag.app.dag.impl.DAGImpl.VertexGroupInfo;
+import org.apache.tez.dag.app.dag.impl.Edge.PendingEventRouteMetadata;
import org.apache.tez.dag.app.dag.speculation.legacy.LegacySpeculator;
import org.apache.tez.dag.history.DAGHistoryEvent;
import org.apache.tez.dag.history.HistoryEvent;
@@ -202,7 +204,6 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
//final fields
private final Clock clock;
-
private final Lock readLock;
private final Lock writeLock;
private final TaskAttemptListener taskAttemptListener;
@@ -225,6 +226,9 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
private Configuration vertexConf;
private final boolean isSpeculationEnabled;
+
+ @VisibleForTesting
+ public boolean useOnDemandRouting = true;
//fields initialized in init
@@ -726,9 +730,18 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
private Set<String> inputsWithInitializers;
private int numInitializedInputs;
private boolean startSignalPending = false;
- private boolean tasksNotYetScheduled = true;
// We may always store task events in the vertex for scalability
List<TezEvent> pendingTaskEvents = Lists.newLinkedList();
+ private boolean tasksNotYetScheduled = true;
+ // must be a random access structure
+
+ private final List<EventInfo> onDemandRouteEvents = Lists.newArrayListWithCapacity(1000);
+ private final ReadWriteLock onDemandRouteEventsReadWriteLock = new ReentrantReadWriteLock();
+ private final Lock onDemandRouteEventsReadLock = onDemandRouteEventsReadWriteLock.readLock();
+ private final Lock onDemandRouteEventsWriteLock = onDemandRouteEventsReadWriteLock.writeLock();
+
+ private static final List<TezEvent> EMPTY_TASK_ATTEMPT_TEZ_EVENTS =
+ new ArrayList(0);
List<TezEvent> pendingRouteEvents = new LinkedList<TezEvent>();
List<TezTaskAttemptID> pendingReportedSrcCompletions = Lists.newLinkedList();
@@ -771,6 +784,17 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
private VertexStats vertexStats = null;
private final TaskSpecificLaunchCmdOption taskSpecificLaunchCmdOpts;
+
+ static class EventInfo {
+ final TezEvent tezEvent;
+ final Edge eventEdge;
+ final int eventTaskIndex;
+ EventInfo(TezEvent tezEvent, Edge eventEdge, int eventTaskIndex) {
+ this.tezEvent = tezEvent;
+ this.eventEdge = eventEdge;
+ this.eventTaskIndex = eventTaskIndex;
+ }
+ }
private VertexStatisticsImpl finalStatistics;
@@ -1175,6 +1199,11 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
}
}
+ @VisibleForTesting
+ List<EventInfo> getOnDemandRouteEvents() {
+ return onDemandRouteEvents;
+ }
+
private void computeProgress() {
this.readLock.lock();
try {
@@ -1388,24 +1417,51 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
}
}
- @Override
- public void scheduleTasks(List<TaskWithLocationHint> tasksToSchedule) {
- writeLock.lock();
- try {
+ void setupEdgeRouting() throws AMUserCodeException {
+ for (Edge e : sourceVertices.values()) {
+ boolean edgeDoingOnDemand = e.routingToBegin();
+ if (useOnDemandRouting && !edgeDoingOnDemand) {
+ useOnDemandRouting = false;
+ LOG.info("Not using ondemand routing because of edge between " + e.getSourceVertexName()
+ + " and " + getLogIdentifier());
+ }
+ }
+ }
+
+ private void unsetTasksNotYetScheduled() throws AMUserCodeException {
+ if (tasksNotYetScheduled) {
+ setupEdgeRouting();
tasksNotYetScheduled = false;
+ // only now can we be sure of the edge manager type. so until now
+ // we will accumulate pending tasks in case legacy routing gets used.
+ // this is only needed to support mixed mode routing. Else for
+ // on demand routing events can be directly added to taskEvents when
+ // they arrive in handleRoutedEvents instead of first caching them in
+ // pendingTaskEvents. When legacy routing is removed then pendingTaskEvents
+ // can be removed.
if (!pendingTaskEvents.isEmpty()) {
LOG.info("Routing pending task events for vertex: " + logIdentifier);
try {
- handleRoutedTezEvents(this, pendingTaskEvents, false, true);
+ handleRoutedTezEvents(pendingTaskEvents, false, true);
} catch (AMUserCodeException e) {
- String msg = "Exception in " + e.getSource() +", vertex=" + logIdentifier;
+ String msg = "Exception in " + e.getSource() + ", vertex=" + logIdentifier;
LOG.error(msg, e);
- addDiagnostic(msg + ", " + e.getMessage() + ", " + ExceptionUtils.getStackTrace(e.getCause()));
- eventHandler.handle(new VertexEventTermination(vertexId, VertexTerminationCause.AM_USERCODE_FAILURE));
+ addDiagnostic(msg + ", " + e.getMessage() + ", "
+ + ExceptionUtils.getStackTrace(e.getCause()));
+ eventHandler.handle(new VertexEventTermination(vertexId,
+ VertexTerminationCause.AM_USERCODE_FAILURE));
return;
}
pendingTaskEvents.clear();
}
+ }
+ }
+
+ @Override
+ public void scheduleTasks(List<TaskWithLocationHint> tasksToSchedule) {
+ writeLock.lock();
+ try {
+ unsetTasksNotYetScheduled();
for (TaskWithLocationHint task : tasksToSchedule) {
if (numTasks <= task.getTaskIndex().intValue()) {
throw new TezUncheckedException(
@@ -1422,6 +1478,13 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
TezTaskID.getInstance(vertexId, task.getTaskIndex().intValue()),
TaskEventType.T_SCHEDULE));
}
+ } catch (AMUserCodeException e) {
+ String msg = "Exception in " + e.getSource() + ", vertex=" + getLogIdentifier();
+ LOG.error(msg, e);
+ // send event to fail the vertex
+ eventHandler.handle(new VertexEventManagerUserCodeError(getVertexId(), e));
+ // throw an unchecked exception to stop the vertex manager that invoked this.
+ throw new TezUncheckedException(e);
} finally {
writeLock.unlock();
}
@@ -2497,7 +2560,6 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
}
break;
case RUNNING:
- vertex.tasksNotYetScheduled = false;
try {
vertex.initializeCommitters();
} catch (Exception e) {
@@ -2530,6 +2592,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
}
try {
vertex.recoveryCodeSimulatingStart();
+ vertex.unsetTasksNotYetScheduled();
endState = VertexState.RUNNING;
} catch (AMUserCodeException e) {
String msg = "Exception in " + e.getSource() + ", vertex:" + vertex.getLogIdentifier();
@@ -2560,7 +2623,6 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
VertexTerminationCause.COMMIT_FAILURE, msg);
endState = VertexState.FAILED;
} else {
- vertex.tasksNotYetScheduled = false;
// recover tasks
if (vertex.tasks != null && vertex.numTasks != 0) {
TaskState taskState = TaskState.KILLED;
@@ -2578,6 +2640,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
}
try {
vertex.recoveryCodeSimulatingStart();
+ vertex.unsetTasksNotYetScheduled();
endState = VertexState.RUNNING;
} catch (AMUserCodeException e) {
String msg = "Exception in " + e.getSource() +", vertex:" + vertex.getLogIdentifier();
@@ -2901,7 +2964,6 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
endState = VertexState.INITED;
break;
case RUNNING:
- vertex.tasksNotYetScheduled = false;
// if commit in progress and desired state is not a succeeded one,
// move to failed
if (vertex.recoveryCommitInProgress) {
@@ -2946,6 +3008,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
}
try {
vertex.recoveryCodeSimulatingStart();
+ vertex.unsetTasksNotYetScheduled();
endState = VertexState.RUNNING;
} catch (AMUserCodeException e) {
String msg = "Exception in " + e.getSource() + ", vertex=" + vertex.getLogIdentifier();
@@ -2962,7 +3025,6 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
case SUCCEEDED:
case FAILED:
case KILLED:
- vertex.tasksNotYetScheduled = false;
// recover tasks
assert vertex.tasks.size() == vertex.numTasks;
if (vertex.tasks != null && vertex.numTasks != 0) {
@@ -2982,6 +3044,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
// Wait for all tasks to recover and report back
try {
vertex.recoveryCodeSimulatingStart();
+ vertex.unsetTasksNotYetScheduled();
endState = VertexState.RUNNING;
} catch (AMUserCodeException e) {
String msg = "Exception in " + e.getSource() + ", vertex:" + vertex.getLogIdentifier();
@@ -3025,7 +3088,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
vertex.recoveredEvents.clear();
if (!vertex.pendingRouteEvents.isEmpty()) {
try {
- handleRoutedTezEvents(vertex, vertex.pendingRouteEvents, false, true);
+ vertex.handleRoutedTezEvents(vertex.pendingRouteEvents, false, true);
vertex.pendingRouteEvents.clear();
} catch (AMUserCodeException e) {
String msg = "Exception in " + e.getSource() + ", vertex=" + vertex.getLogIdentifier();
@@ -3284,7 +3347,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
List<TezEvent> inputInfoEvents = iEvent.getEvents();
try {
if (inputInfoEvents != null && !inputInfoEvents.isEmpty()) {
- VertexImpl.handleRoutedTezEvents(vertex, inputInfoEvents, false, false);
+ vertex.handleRoutedTezEvents(inputInfoEvents, false, false);
}
} catch (AMUserCodeException e) {
String msg = "Exception in " + e.getSource() + ", vertex:" + vertex.getLogIdentifier();
@@ -3941,7 +4004,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
boolean recovered = rEvent.isRecovered();
List<TezEvent> tezEvents = rEvent.getEvents();
try {
- VertexImpl.handleRoutedTezEvents(vertex, tezEvents, recovered, false);
+ vertex.handleRoutedTezEvents(tezEvents, recovered, false);
} catch (AMUserCodeException e) {
String msg = "Exception in " + e.getSource() + ", vertex=" + vertex.getLogIdentifier();
LOG.error(msg, e);
@@ -3959,16 +4022,105 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
return vertex.getState();
}
}
+
+ @Override
+ public TaskAttemptEventInfo getTaskAttemptTezEvents(TezTaskAttemptID attemptID,
+ int fromEventId, int maxEvents) {
+ if (!useOnDemandRouting) {
+ List<TezEvent> events = getTask(attemptID.getTaskID()).getTaskAttemptTezEvents(attemptID, fromEventId, maxEvents);
+ return new TaskAttemptEventInfo(fromEventId + events.size(), events);
+ }
- private static void handleRoutedTezEvents(VertexImpl vertex, List<TezEvent> tezEvents, boolean recovered, boolean isPendingEvents) throws AMUserCodeException {
- if (vertex.getAppContext().isRecoveryEnabled()
+ onDemandRouteEventsReadLock.lock();
+ try {
+ List<TezEvent> events = EMPTY_TASK_ATTEMPT_TEZ_EVENTS;
+ int nextFromEventId = fromEventId;
+ int currEventCount = onDemandRouteEvents.size();
+ try {
+ if (currEventCount > fromEventId) {
+ events = Lists.newArrayListWithCapacity(maxEvents);
+ int taskIndex = attemptID.getTaskID().getId();
+ Preconditions.checkState(taskIndex < tasks.size(), "Invalid task index for TA: " + attemptID
+ + " vertex: " + getLogIdentifier());
+ boolean isFirstEvent = true;
+ for (nextFromEventId = fromEventId; nextFromEventId < currEventCount; ++nextFromEventId) {
+ boolean earlyExit = false;
+ if (events.size() == maxEvents) {
+ break;
+ }
+ EventInfo eventInfo = onDemandRouteEvents.get(nextFromEventId);
+ TezEvent tezEvent = eventInfo.tezEvent;
+ switch(tezEvent.getEventType()) {
+ case INPUT_FAILED_EVENT:
+ case DATA_MOVEMENT_EVENT:
+ case COMPOSITE_DATA_MOVEMENT_EVENT:
+ {
+ int srcTaskIndex = eventInfo.eventTaskIndex;
+ Edge srcEdge = eventInfo.eventEdge;
+ PendingEventRouteMetadata pendingRoute = null;
+ if (isFirstEvent) {
+ // do this precondition check only for the first event
+ isFirstEvent = false;
+ pendingRoute = srcEdge.removePendingEvents(attemptID);
+ if (pendingRoute != null) {
+ Preconditions.checkState(tezEvent == pendingRoute.getTezEvent()); // same object
+ }
+ }
+ if (!srcEdge.maybeAddTezEventForDestinationTask(tezEvent, attemptID, srcTaskIndex,
+ events, maxEvents, pendingRoute)) {
+ // not enough space left for this iteration events.
+ // Exit and start from here next time
+ earlyExit = true;
+ }
+ }
+ break;
+ case ROOT_INPUT_DATA_INFORMATION_EVENT:
+ {
+ InputDataInformationEvent riEvent = (InputDataInformationEvent) tezEvent.getEvent();
+ if (riEvent.getTargetIndex() == taskIndex) {
+ events.add(tezEvent);
+ }
+ }
+ break;
+ default:
+ throw new TezUncheckedException("Unexpected event type for task: "
+ + tezEvent.getEventType());
+ }
+ if (earlyExit) {
+ break;
+ }
+ }
+ }
+ } catch (AMUserCodeException e) {
+ String msg = "Exception in " + e.getSource() + ", vertex=" + getLogIdentifier();
+ LOG.error(msg, e);
+ eventHandler.handle(new VertexEventManagerUserCodeError(getVertexId(), e));
+ nextFromEventId = fromEventId;
+ events.clear();
+ }
+
+ if (events.size() > 0) {
+ StringBuilder builder = new StringBuilder();
+ builder.append("Sending ").append(attemptID).append(" numEvents: ").append(events.size())
+ .append(" from: ").append(fromEventId).append(" to: ").append(nextFromEventId)
+ .append(" out of ").append(currEventCount).append(" events in vertex: ").append(getLogIdentifier());
+ LOG.info(builder.toString());
+ }
+ return new TaskAttemptEventInfo(nextFromEventId, events);
+ } finally {
+ onDemandRouteEventsReadLock.unlock();
+ }
+ }
+
+ private void handleRoutedTezEvents(List<TezEvent> tezEvents, boolean recovered, boolean isPendingEvents) throws AMUserCodeException {
+ if (getAppContext().isRecoveryEnabled()
&& !recovered
&& !isPendingEvents
&& !tezEvents.isEmpty()) {
List<TezEvent> recoveryEvents =
Lists.newArrayList();
for (TezEvent tezEvent : tezEvents) {
- if (!isEventFromVertex(vertex, tezEvent.getSourceInfo())) {
+ if (!isEventFromVertex(this, tezEvent.getSourceInfo())) {
continue;
}
if (tezEvent.getEventType().equals(EventType.COMPOSITE_DATA_MOVEMENT_EVENT)
@@ -3980,15 +4132,15 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
}
if (!recoveryEvents.isEmpty()) {
VertexRecoverableEventsGeneratedEvent historyEvent =
- new VertexRecoverableEventsGeneratedEvent(vertex.vertexId,
+ new VertexRecoverableEventsGeneratedEvent(vertexId,
recoveryEvents);
- vertex.appContext.getHistoryHandler().handle(
- new DAGHistoryEvent(vertex.getDAGId(), historyEvent));
+ appContext.getHistoryHandler().handle(
+ new DAGHistoryEvent(getDAGId(), historyEvent));
}
}
for(TezEvent tezEvent : tezEvents) {
if (LOG.isDebugEnabled()) {
- LOG.debug("Vertex: " + vertex.getName() + " routing event: "
+ LOG.debug("Vertex: " + getLogIdentifier() + " routing event: "
+ tezEvent.getEventType()
+ " Recovered:" + recovered);
}
@@ -3998,7 +4150,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
case DATA_MOVEMENT_EVENT:
case COMPOSITE_DATA_MOVEMENT_EVENT:
{
- if (isEventFromVertex(vertex, sourceMeta)) {
+ if (isEventFromVertex(this, sourceMeta)) {
// event from this vertex. send to destination vertex
TezTaskAttemptID srcTaId = sourceMeta.getTaskAttemptID();
if (tezEvent.getEventType() == EventType.DATA_MOVEMENT_EVENT) {
@@ -4008,56 +4160,86 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
} else {
((InputFailedEvent) tezEvent.getEvent()).setVersion(srcTaId.getId());
}
- Vertex destVertex = vertex.getDAG().getVertex(sourceMeta.getEdgeVertexName());
- Edge destEdge = vertex.targetVertices.get(destVertex);
+ Vertex destVertex = getDAG().getVertex(sourceMeta.getEdgeVertexName());
+ Edge destEdge = targetVertices.get(destVertex);
if (destEdge == null) {
throw new TezUncheckedException("Bad destination vertex: " +
sourceMeta.getEdgeVertexName() + " for event vertex: " +
- vertex.getLogIdentifier());
+ getLogIdentifier());
}
- vertex.eventHandler.handle(new VertexEventRouteEvent(destVertex
+ eventHandler.handle(new VertexEventRouteEvent(destVertex
.getVertexId(), Collections.singletonList(tezEvent)));
} else {
- // event not from this vertex. must have come from source vertex.
- // send to tasks
- if (vertex.tasksNotYetScheduled) {
- vertex.pendingTaskEvents.add(tezEvent);
+ if (tasksNotYetScheduled) {
+ // this is only needed to support mixed mode routing. Else for
+ // on demand routing events can be directly added to taskEvents
+ // when legacy routing is removed then pending task events can be
+ // removed.
+ pendingTaskEvents.add(tezEvent);
} else {
- Edge srcEdge = vertex.sourceVertices.get(vertex.getDAG().getVertex(
- sourceMeta.getTaskVertexName()));
- if (srcEdge == null) {
- throw new TezUncheckedException("Bad source vertex: " +
- sourceMeta.getTaskVertexName() + " for destination vertex: " +
- vertex.getLogIdentifier());
+ // event not from this vertex. must have come from source vertex.
+ if (useOnDemandRouting) {
+ int srcTaskIndex = sourceMeta.getTaskAttemptID().getTaskID().getId();
+ Vertex edgeVertex = getDAG().getVertex(sourceMeta.getTaskVertexName());
+ Edge srcEdge = sourceVertices.get(edgeVertex);
+ if (srcEdge == null) {
+ throw new TezUncheckedException("Bad source vertex: " +
+ sourceMeta.getTaskVertexName() + " for destination vertex: " +
+ getLogIdentifier());
+ }
+ onDemandRouteEventsWriteLock.lock();
+ try {
+ onDemandRouteEvents.add(new EventInfo(tezEvent, srcEdge, srcTaskIndex));
+ } finally {
+ onDemandRouteEventsWriteLock.unlock();
+ }
+ } else {
+ // send to tasks
+ Edge srcEdge = sourceVertices.get(getDAG().getVertex(
+ sourceMeta.getTaskVertexName()));
+ if (srcEdge == null) {
+ throw new TezUncheckedException("Bad source vertex: "
+ + sourceMeta.getTaskVertexName() + " for destination vertex: "
+ + getLogIdentifier());
+ }
+ srcEdge.sendTezEventToDestinationTasks(tezEvent);
}
- srcEdge.sendTezEventToDestinationTasks(tezEvent);
}
}
}
break;
case ROOT_INPUT_DATA_INFORMATION_EVENT:
- if (vertex.tasksNotYetScheduled) {
- vertex.pendingTaskEvents.add(tezEvent);
+ {
+ checkEventSourceMetadata(this, sourceMeta);
+ if (tasksNotYetScheduled) {
+ // this is only needed to support mixed mode routing. Else for
+ // on demand routing events can be directly added to taskEvents
+ // when legacy routing is removed then pending task events can be
+ // removed.
+ pendingTaskEvents.add(tezEvent);
} else {
- checkEventSourceMetadata(vertex, sourceMeta);
- InputDataInformationEvent riEvent = (InputDataInformationEvent) tezEvent
- .getEvent();
- Task targetTask = vertex.getTask(riEvent.getTargetIndex());
- targetTask.registerTezEvent(tezEvent);
+ if (useOnDemandRouting) {
+ onDemandRouteEvents.add(new EventInfo(tezEvent, null, -1));
+ } else {
+ InputDataInformationEvent riEvent = (InputDataInformationEvent) tezEvent.getEvent();
+ Task targetTask = getTask(riEvent.getTargetIndex());
+ targetTask.registerTezEvent(tezEvent);
+ }
}
+ }
break;
case VERTEX_MANAGER_EVENT:
{
// VM events on task success only can be changed as part of TEZ-1532
VertexManagerEvent vmEvent = (VertexManagerEvent) tezEvent.getEvent();
- Vertex target = vertex.getDAG().getVertex(vmEvent.getTargetVertexName());
+ Vertex target = getDAG().getVertex(vmEvent.getTargetVertexName());
Preconditions.checkArgument(target != null,
"Event sent to unkown vertex: " + vmEvent.getTargetVertexName());
- if (target == vertex) {
- vertex.vertexManager.onVertexManagerEventReceived(vmEvent);
+ if (target == this) {
+ vertexManager.onVertexManagerEventReceived(vmEvent);
} else {
- checkEventSourceMetadata(vertex, sourceMeta);
- vertex.eventHandler.handle(new VertexEventRouteEvent(target
+ checkEventSourceMetadata(this, sourceMeta);
+ eventHandler.handle(new VertexEventRouteEvent(target
.getVertexId(), Collections.singletonList(tezEvent)));
}
}
@@ -4065,45 +4247,46 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
case ROOT_INPUT_INITIALIZER_EVENT:
{
InputInitializerEvent riEvent = (InputInitializerEvent) tezEvent.getEvent();
- Vertex target = vertex.getDAG().getVertex(riEvent.getTargetVertexName());
+ Vertex target = getDAG().getVertex(riEvent.getTargetVertexName());
Preconditions.checkArgument(target != null,
"Event sent to unknown vertex: " + riEvent.getTargetVertexName());
riEvent.setSourceVertexName(tezEvent.getSourceInfo().getTaskVertexName());
- if (target == vertex) {
- if (vertex.rootInputDescriptors == null ||
- !vertex.rootInputDescriptors.containsKey(riEvent.getTargetInputName())) {
+ if (target == this) {
+ if (rootInputDescriptors == null ||
+ !rootInputDescriptors.containsKey(riEvent.getTargetInputName())) {
throw new TezUncheckedException(
"InputInitializerEvent targeted at unknown initializer on vertex " +
- vertex.logIdentifier + ", Event=" + riEvent);
+ logIdentifier + ", Event=" + riEvent);
}
- if (vertex.getState() == VertexState.NEW) {
- vertex.pendingInitializerEvents.add(tezEvent);
- } else if (vertex.getState() == VertexState.INITIALIZING) {
- vertex.rootInputInitializerManager.handleInitializerEvents(Collections.singletonList(tezEvent));
+ if (getState() == VertexState.NEW) {
+ pendingInitializerEvents.add(tezEvent);
+ } else if (getState() == VertexState.INITIALIZING) {
+ rootInputInitializerManager.handleInitializerEvents(Collections.singletonList(tezEvent));
} else {
// Currently, INITED and subsequent states means Initializer complete / failure
if (LOG.isDebugEnabled()) {
- LOG.debug("Dropping event" + tezEvent + " since state is not INITIALIZING in " + vertex.getLogIdentifier() + ", state=" + vertex.getState());
+ LOG.debug("Dropping event" + tezEvent + " since state is not INITIALIZING in "
+ + getLogIdentifier() + ", state=" + getState());
}
}
} else {
- checkEventSourceMetadata(vertex, sourceMeta);
- vertex.eventHandler.handle(new VertexEventRouteEvent(target.getVertexId(),
+ checkEventSourceMetadata(this, sourceMeta);
+ eventHandler.handle(new VertexEventRouteEvent(target.getVertexId(),
Collections.singletonList(tezEvent)));
}
}
break;
case INPUT_READ_ERROR_EVENT:
{
- checkEventSourceMetadata(vertex, sourceMeta);
- Edge srcEdge = vertex.sourceVertices.get(vertex.getDAG().getVertex(
+ checkEventSourceMetadata(this, sourceMeta);
+ Edge srcEdge = sourceVertices.get(this.getDAG().getVertex(
sourceMeta.getEdgeVertexName()));
srcEdge.sendTezEventToSourceTasks(tezEvent);
}
break;
case TASK_ATTEMPT_FAILED_EVENT:
{
- checkEventSourceMetadata(vertex, sourceMeta);
+ checkEventSourceMetadata(this, sourceMeta);
TaskAttemptTerminationCause errCause = null;
switch (sourceMeta.getEventGenerator()) {
case INPUT:
@@ -4124,7 +4307,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
}
TaskAttemptFailedEvent taskFailedEvent =
(TaskAttemptFailedEvent) tezEvent.getEvent();
- vertex.getEventHandler().handle(
+ getEventHandler().handle(
new TaskAttemptEventAttemptFailed(sourceMeta.getTaskAttemptID(),
TaskAttemptEventType.TA_FAILED,
"Error: " + taskFailedEvent.getDiagnostics(),
@@ -4134,8 +4317,8 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
break;
case TASK_ATTEMPT_COMPLETED_EVENT:
{
- checkEventSourceMetadata(vertex, sourceMeta);
- vertex.getEventHandler().handle(
+ checkEventSourceMetadata(this, sourceMeta);
+ getEventHandler().handle(
new TaskAttemptEvent(sourceMeta.getTaskAttemptID(), TaskAttemptEventType.TA_DONE));
}
break;
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index 18286b5..5cd487c 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -92,8 +92,8 @@ public class MockDAGAppMaster extends DAGAppMaster {
MockContainerLauncher containerLauncher;
boolean initFailFlag;
boolean startFailFlag;
- boolean sendDMEvents;
boolean recoveryFatalError = false;
+ EventsDelegate eventsDelegate;
CountersDelegate countersDelegate;
StatisticsDelegate statsDelegate;
long launcherSleepTime = 1;
@@ -112,6 +112,10 @@ public class MockDAGAppMaster extends DAGAppMaster {
public static interface CountersDelegate {
public TezCounters getCounters(TaskSpec taskSpec);
}
+
+ public static interface EventsDelegate {
+ public void getEvents(TaskSpec taskSpec, List<TezEvent> events);
+ }
// mock container launcher does not launch real tasks.
// Upon, launch of a container is simulates the container asking for tasks
@@ -334,7 +338,7 @@ public class MockDAGAppMaster extends DAGAppMaster {
if (response.shouldDie()) {
cData.remove();
} else {
- cData.nextFromEventId += response.getEvents().size();
+ cData.nextFromEventId += response.getNextFromEventId();
if (!response.getEvents().isEmpty()) {
long stopTime = System.nanoTime();
long stopCpuTime = threadMxBean.getCurrentThreadCpuTime();
@@ -400,19 +404,8 @@ public class MockDAGAppMaster extends DAGAppMaster {
updatesToMake != null && cData.numUpdates < updatesToMake) {
List<TezEvent> events = Lists.newArrayListWithCapacity(
cData.taskSpec.getOutputs().size() + 1);
- if (sendDMEvents) {
- for (OutputSpec output : cData.taskSpec.getOutputs()) {
- if (output.getPhysicalEdgeCount() == 1) {
- events.add(new TezEvent(DataMovementEvent.create(0, 0, 0, null), new EventMetaData(
- EventProducerConsumerType.OUTPUT, cData.vName, output
- .getDestinationVertexName(), cData.taId)));
- } else {
- events.add(new TezEvent(CompositeDataMovementEvent.create(0,
- output.getPhysicalEdgeCount(), null), new EventMetaData(
- EventProducerConsumerType.OUTPUT, cData.vName, output
- .getDestinationVertexName(), cData.taId)));
- }
- }
+ if (cData.numUpdates == 0 && eventsDelegate != null) {
+ eventsDelegate.getEvents(cData.taskSpec, events);
}
TezCounters counters = null;
if (countersDelegate != null) {
@@ -428,7 +421,7 @@ public class MockDAGAppMaster extends DAGAppMaster {
events.add(new TezEvent(new TaskStatusUpdateEvent(counters, progress, stats), new EventMetaData(
EventProducerConsumerType.SYSTEM, cData.vName, "", cData.taId)));
TezHeartbeatRequest request = new TezHeartbeatRequest(cData.numUpdates, events,
- cData.cIdStr, cData.taId, cData.nextFromEventId, 10000);
+ cData.cIdStr, cData.taId, cData.nextFromEventId, 50000);
doHeartbeat(request, cData);
} else if (version != null && cData.taId.getId() <= version.intValue()) {
preemptContainer(cData);
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/test/java/org/apache/tez/dag/app/TestMemoryWithEvents.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestMemoryWithEvents.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestMemoryWithEvents.java
new file mode 100644
index 0000000..c277b38
--- /dev/null
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestMemoryWithEvents.java
@@ -0,0 +1,219 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.tez.dag.app;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.tez.dag.api.DAG;
+import org.apache.tez.dag.api.DataSourceDescriptor;
+import org.apache.tez.dag.api.Edge;
+import org.apache.tez.dag.api.EdgeProperty;
+import org.apache.tez.dag.api.InputDescriptor;
+import org.apache.tez.dag.api.InputInitializerDescriptor;
+import org.apache.tez.dag.api.OutputDescriptor;
+import org.apache.tez.dag.api.ProcessorDescriptor;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.Vertex;
+import org.apache.tez.dag.api.EdgeProperty.DataMovementType;
+import org.apache.tez.dag.api.EdgeProperty.DataSourceType;
+import org.apache.tez.dag.api.EdgeProperty.SchedulingType;
+import org.apache.tez.dag.api.client.DAGClient;
+import org.apache.tez.dag.api.client.DAGStatus;
+import org.apache.tez.dag.app.MockDAGAppMaster.MockContainerLauncher;
+import org.apache.tez.runtime.api.Event;
+import org.apache.tez.runtime.api.InputInitializer;
+import org.apache.tez.runtime.api.InputInitializerContext;
+import org.apache.tez.runtime.api.events.InputDataInformationEvent;
+import org.apache.tez.runtime.api.events.InputInitializerEvent;
+import org.junit.Assert;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+// The objective of these tests is to make sure the large job simulations pass
+// within the memory limits set by the junit tests (1GB)
+// For large jobs please increase memory limits to account for memory used by the
+// simulation code itself
+public class TestMemoryWithEvents {
+ static Configuration defaultConf;
+ static FileSystem localFs;
+
+ static {
+ try {
+ defaultConf = new Configuration(false);
+ defaultConf.set("fs.defaultFS", "file:///");
+ defaultConf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true);
+ localFs = FileSystem.getLocal(defaultConf);
+ String stagingDir = "target" + Path.SEPARATOR + TestMemoryWithEvents.class.getName() + "-tmpDir";
+ defaultConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir);
+ Logger.getRootLogger().setLevel(Level.WARN);
+ } catch (IOException e) {
+ throw new RuntimeException("init failure", e);
+ }
+ }
+
+ final int numThreads = 30;
+ final int numTasks = 10000;
+
+ private void checkMemory(String name, MockDAGAppMaster mockApp) {
+ long mb = 1024*1024;
+ long microsPerMs = 1000;
+
+ //Getting the runtime reference from system
+ Runtime runtime = Runtime.getRuntime();
+
+ System.out.println("##### Heap utilization statistics [MB] for " + name);
+
+ runtime.gc();
+
+ //Print used memory
+ System.out.println("##### Used Memory:"
+ + (runtime.totalMemory() - runtime.freeMemory()) / mb);
+
+ //Print free memory
+ System.out.println("##### Free Memory:"
+ + runtime.freeMemory() / mb);
+
+ //Print total available memory
+ System.out.println("##### Total Memory:" + runtime.totalMemory() / mb);
+
+ //Print Maximum available memory
+ System.out.println("##### Max Memory:" + runtime.maxMemory() / mb);
+
+ //Print Maximum heartbeat time
+ long numHeartbeats = mockApp.numHearbeats.get();
+ if (numHeartbeats == 0) {
+ numHeartbeats = 1;
+ }
+ System.out.println("##### Heartbeat (ms) :"
+ + " latency avg: " + ((mockApp.heartbeatTime.get() / numHeartbeats) / microsPerMs)
+ + " cpu total: " + (mockApp.heartbeatCpu.get() / microsPerMs)
+ + " cpu avg: " + ((mockApp.heartbeatCpu.get() / numHeartbeats) / microsPerMs)
+ + " numHeartbeats: " + mockApp.numHearbeats.get());
+ }
+
+ private void testMemory(DAG dag, boolean sendDMEvents) throws Exception {
+ TezConfiguration tezconf = new TezConfiguration(defaultConf);
+
+ MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null,
+ null, false, false, numThreads, 1000);
+ tezClient.start();
+
+ MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
+ MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
+ mockLauncher.startScheduling(false);
+ mockApp.eventsDelegate = new TestMockDAGAppMaster.TestEventsDelegate();
+ mockApp.doSleep = false;
+ DAGClient dagClient = tezClient.submitDAG(dag);
+ mockLauncher.waitTillContainersLaunched();
+ mockLauncher.startScheduling(true);
+ DAGStatus status = dagClient.waitForCompletion();
+ Assert.assertEquals(DAGStatus.State.SUCCEEDED, status.getState());
+ checkMemory(dag.getName(), mockApp);
+
+ tezClient.stop();
+ }
+
+ public static class SimulationInitializer extends InputInitializer {
+ public SimulationInitializer(InputInitializerContext initializerContext) {
+ super(initializerContext);
+ }
+
+ @Override
+ public List<Event> initialize() throws Exception {
+ int numTasks = getContext().getNumTasks();
+ List<Event> events = Lists.newArrayListWithCapacity(numTasks);
+ for (int i=0; i<numTasks; ++i) {
+ events.add(InputDataInformationEvent.createWithSerializedPayload(i, null));
+ }
+ return events;
+ }
+
+ @Override
+ public void handleInputInitializerEvent(List<InputInitializerEvent> events) throws Exception {
+ }
+ }
+
+ @Ignore
+ @Test (timeout = 600000)
+ public void testMemoryRootInputEvents() throws Exception {
+ DAG dag = DAG.create("testMemoryRootInputEvents");
+ Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), numTasks);
+ Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), numTasks);
+ vA.addDataSource(
+ "Input",
+ DataSourceDescriptor.create(InputDescriptor.create("In"),
+ InputInitializerDescriptor.create(SimulationInitializer.class.getName()), null));
+ dag.addVertex(vA).addVertex(vB);
+ testMemory(dag, false);
+ }
+
+ @Ignore
+ @Test (timeout = 600000)
+ public void testMemoryOneToOne() throws Exception {
+ DAG dag = DAG.create("testMemoryOneToOne");
+ Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), numTasks);
+ Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), numTasks);
+ dag.addVertex(vA)
+ .addVertex(vB)
+ .addEdge(
+ Edge.create(vA, vB, EdgeProperty.create(DataMovementType.ONE_TO_ONE,
+ DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+ OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
+ testMemory(dag, true);
+ }
+
+ @Ignore
+ @Test (timeout = 600000)
+ public void testMemoryBroadcast() throws Exception {
+ DAG dag = DAG.create("testMemoryBroadcast");
+ Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), numTasks);
+ Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), numTasks);
+ dag.addVertex(vA)
+ .addVertex(vB)
+ .addEdge(
+ Edge.create(vA, vB, EdgeProperty.create(DataMovementType.BROADCAST,
+ DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+ OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
+ testMemory(dag, true);
+ }
+
+ @Ignore
+ @Test (timeout = 600000)
+ public void testMemoryScatterGather() throws Exception {
+ DAG dag = DAG.create("testMemoryScatterGather");
+ Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), numTasks);
+ Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), numTasks);
+ dag.addVertex(vA)
+ .addVertex(vB)
+ .addEdge(
+ Edge.create(vA, vB, EdgeProperty.create(DataMovementType.SCATTER_GATHER,
+ DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+ OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
+ testMemory(dag, true);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java
index 87ffead..1e7faf9 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java
@@ -23,6 +23,7 @@ import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
+import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Map;
@@ -49,6 +50,9 @@ import org.apache.tez.common.counters.TezCounters;
import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.DataSourceDescriptor;
import org.apache.tez.dag.api.Edge;
+import org.apache.tez.dag.api.EdgeManagerPlugin;
+import org.apache.tez.dag.api.EdgeManagerPluginContext;
+import org.apache.tez.dag.api.EdgeManagerPluginDescriptor;
import org.apache.tez.dag.api.EdgeProperty;
import org.apache.tez.dag.api.EdgeProperty.DataSourceType;
import org.apache.tez.dag.api.EdgeProperty.SchedulingType;
@@ -69,6 +73,7 @@ import org.apache.tez.dag.api.client.VertexStatus;
import org.apache.tez.dag.api.client.VertexStatus.State;
import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
import org.apache.tez.dag.app.MockDAGAppMaster.CountersDelegate;
+import org.apache.tez.dag.app.MockDAGAppMaster.EventsDelegate;
import org.apache.tez.dag.app.MockDAGAppMaster.MockContainerLauncher;
import org.apache.tez.dag.app.MockDAGAppMaster.MockContainerLauncher.ContainerData;
import org.apache.tez.dag.app.MockDAGAppMaster.StatisticsDelegate;
@@ -84,13 +89,17 @@ import org.apache.tez.dag.records.TezVertexID;
import org.apache.tez.runtime.api.VertexStatistics;
import org.apache.tez.runtime.api.OutputCommitter;
import org.apache.tez.runtime.api.OutputCommitterContext;
+import org.apache.tez.runtime.api.events.CompositeDataMovementEvent;
import org.apache.tez.runtime.api.events.DataMovementEvent;
+import org.apache.tez.runtime.api.events.InputReadErrorEvent;
+import org.apache.tez.runtime.api.impl.EventMetaData;
import org.apache.tez.runtime.api.impl.IOStatistics;
import org.apache.tez.runtime.api.impl.InputSpec;
import org.apache.tez.runtime.api.impl.OutputSpec;
import org.apache.tez.runtime.api.impl.TaskSpec;
import org.apache.tez.runtime.api.impl.TaskStatistics;
import org.apache.tez.runtime.api.impl.TezEvent;
+import org.apache.tez.runtime.api.impl.EventMetaData.EventProducerConsumerType;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
@@ -101,8 +110,7 @@ import com.google.common.primitives.Ints;
public class TestMockDAGAppMaster {
private static final Log LOG = LogFactory.getLog(TestMockDAGAppMaster.class);
static Configuration defaultConf;
- static FileSystem localFs;
-
+ static FileSystem localFs;
static {
try {
defaultConf = new Configuration(false);
@@ -116,6 +124,24 @@ public class TestMockDAGAppMaster {
}
}
+ static class TestEventsDelegate implements EventsDelegate {
+ @Override
+ public void getEvents(TaskSpec taskSpec, List<TezEvent> events) {
+ for (OutputSpec output : taskSpec.getOutputs()) {
+ if (output.getPhysicalEdgeCount() == 1) {
+ events.add(new TezEvent(DataMovementEvent.create(0, 0, 0, null), new EventMetaData(
+ EventProducerConsumerType.OUTPUT, taskSpec.getVertexName(), output
+ .getDestinationVertexName(), taskSpec.getTaskAttemptID())));
+ } else {
+ events.add(new TezEvent(CompositeDataMovementEvent.create(0,
+ output.getPhysicalEdgeCount(), null), new EventMetaData(
+ EventProducerConsumerType.OUTPUT, taskSpec.getVertexName(), output
+ .getDestinationVertexName(), taskSpec.getTaskAttemptID())));
+ }
+ }
+ }
+ }
+
@Test (timeout = 5000)
public void testLocalResourceSetup() throws Exception {
TezConfiguration tezconf = new TezConfiguration(defaultConf);
@@ -196,7 +222,7 @@ public class TestMockDAGAppMaster {
MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
mockLauncher.startScheduling(false);
- mockApp.sendDMEvents = true;
+ mockApp.eventsDelegate = new TestEventsDelegate();
DAG dag = DAG.create("testBasicEvents");
Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 2);
Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), 2);
@@ -227,7 +253,8 @@ public class TestMockDAGAppMaster {
Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
VertexImpl vImpl = (VertexImpl) dagImpl.getVertex(vB.getName());
TaskImpl tImpl = (TaskImpl) vImpl.getTask(1);
- List<TezEvent> tEvents = tImpl.getTaskEvents();
+ TezTaskAttemptID taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 1);
+ List<TezEvent> tEvents = vImpl.getTaskAttemptTezEvents(taId, 0, 1000).getEvents();
Assert.assertEquals(2, tEvents.size()); // 2 from vA
Assert.assertEquals(vA.getName(), tEvents.get(0).getDestinationInfo().getEdgeVertexName());
Assert.assertEquals(0, ((DataMovementEvent)tEvents.get(0).getEvent()).getSourceIndex());
@@ -240,7 +267,8 @@ public class TestMockDAGAppMaster {
(targetIndex1 == 0 && targetIndex2 == 1) || (targetIndex1 == 1 && targetIndex2 == 0));
vImpl = (VertexImpl) dagImpl.getVertex(vC.getName());
tImpl = (TaskImpl) vImpl.getTask(1);
- tEvents = tImpl.getTaskEvents();
+ taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 1);
+ tEvents = vImpl.getTaskAttemptTezEvents(taId, 0, 1000).getEvents();
Assert.assertEquals(2, tEvents.size()); // 2 from vA
Assert.assertEquals(vA.getName(), tEvents.get(0).getDestinationInfo().getEdgeVertexName());
Assert.assertEquals(1, ((DataMovementEvent)tEvents.get(0).getEvent()).getSourceIndex());
@@ -253,7 +281,8 @@ public class TestMockDAGAppMaster {
(targetIndex1 == 0 && targetIndex2 == 1) || (targetIndex1 == 1 && targetIndex2 == 0));
vImpl = (VertexImpl) dagImpl.getVertex(vD.getName());
tImpl = (TaskImpl) vImpl.getTask(1);
- tEvents = tImpl.getTaskEvents();
+ taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 1);
+ tEvents = vImpl.getTaskAttemptTezEvents(taId, 0, 1000).getEvents();
Assert.assertEquals(1, tEvents.size()); // 1 from vA
Assert.assertEquals(vA.getName(), tEvents.get(0).getDestinationInfo().getEdgeVertexName());
Assert.assertEquals(0, ((DataMovementEvent)tEvents.get(0).getEvent()).getTargetIndex());
@@ -261,6 +290,125 @@ public class TestMockDAGAppMaster {
tezClient.stop();
}
+
+ public static class LegacyEdgeTestEdgeManager extends EdgeManagerPlugin {
+ List<Integer> destinationInputIndices =
+ Collections.unmodifiableList(Collections.singletonList(0));
+ public LegacyEdgeTestEdgeManager(EdgeManagerPluginContext context) {
+ super(context);
+ }
+
+ @Override
+ public void initialize() throws Exception {
+ }
+
+ @Override
+ public int getNumDestinationTaskPhysicalInputs(int destinationTaskIndex) throws Exception {
+ return 1;
+ }
+
+ @Override
+ public int getNumSourceTaskPhysicalOutputs(int sourceTaskIndex) throws Exception {
+ return 1;
+ }
+
+ @Override
+ public void routeDataMovementEventToDestination(DataMovementEvent event,
+ int sourceTaskIndex, int sourceOutputIndex,
+ Map<Integer, List<Integer>> destinationTaskAndInputIndices) {
+ destinationTaskAndInputIndices.put(sourceTaskIndex, destinationInputIndices);
+ }
+
+ @Override
+ public void routeInputSourceTaskFailedEventToDestination(int sourceTaskIndex,
+ Map<Integer, List<Integer>> destinationTaskAndInputIndices) {
+ destinationTaskAndInputIndices.put(sourceTaskIndex, destinationInputIndices);
+ }
+
+ @Override
+ public int routeInputErrorEventToSource(InputReadErrorEvent event,
+ int destinationTaskIndex, int destinationFailedInputIndex) {
+ return destinationTaskIndex;
+ }
+
+ @Override
+ public int getNumDestinationConsumerTasks(int sourceTaskIndex) {
+ return 1;
+ }
+ }
+
+ @Test (timeout = 100000)
+ public void testMixedEdgeRouting() throws Exception {
+ TezConfiguration tezconf = new TezConfiguration(defaultConf);
+
+ MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null, null);
+ tezClient.start();
+
+ MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
+ MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
+ mockLauncher.startScheduling(false);
+ mockApp.eventsDelegate = new TestEventsDelegate();
+ DAG dag = DAG.create("testMixedEdgeRouting");
+ Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 1);
+ Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), 1);
+ Vertex vC = Vertex.create("C", ProcessorDescriptor.create("Proc.class"), 1);
+ Vertex vD = Vertex.create("D", ProcessorDescriptor.create("Proc.class"), 1);
+ Vertex vE = Vertex.create("E", ProcessorDescriptor.create("Proc.class"), 1);
+ dag.addVertex(vA)
+ .addVertex(vB)
+ .addVertex(vC)
+ .addVertex(vD)
+ .addVertex(vE)
+ .addEdge(
+ Edge.create(vA, vC, EdgeProperty.create(DataMovementType.SCATTER_GATHER,
+ DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+ OutputDescriptor.create("Out"), InputDescriptor.create("In"))))
+ .addEdge(
+ Edge.create(vB, vC, EdgeProperty.create(DataMovementType.SCATTER_GATHER,
+ DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+ OutputDescriptor.create("Out"), InputDescriptor.create("In"))))
+ .addEdge(
+ Edge.create(vA, vD, EdgeProperty.create(DataMovementType.SCATTER_GATHER,
+ DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+ OutputDescriptor.create("Out"), InputDescriptor.create("In"))))
+ .addEdge(
+ Edge.create(vB, vD, EdgeProperty.create(
+ EdgeManagerPluginDescriptor.create(LegacyEdgeTestEdgeManager.class.getName()),
+ DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+ OutputDescriptor.create("Out"), InputDescriptor.create("In"))))
+ .addEdge(
+ Edge.create(vB, vE, EdgeProperty.create(
+ EdgeManagerPluginDescriptor.create(LegacyEdgeTestEdgeManager.class.getName()),
+ DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+ OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
+
+ DAGClient dagClient = tezClient.submitDAG(dag);
+ mockLauncher.waitTillContainersLaunched();
+ DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
+ mockLauncher.startScheduling(true);
+ dagClient.waitForCompletion();
+ Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
+ // vC uses on demand routing and its task does not provide events
+ VertexImpl vImpl = (VertexImpl) dagImpl.getVertex(vC.getName());
+ Assert.assertEquals(true, vImpl.useOnDemandRouting);
+ TaskImpl tImpl = (TaskImpl) vImpl.getTask(0);
+ TezTaskAttemptID taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 0);
+ Assert.assertEquals(0, tImpl.getTaskAttemptTezEvents(taId, 0, 1000).size());
+ // vD is mixed more and does not use on demand routing and its task provides events
+ vImpl = (VertexImpl) dagImpl.getVertex(vD.getName());
+ Assert.assertEquals(false, vImpl.useOnDemandRouting);
+ tImpl = (TaskImpl) vImpl.getTask(0);
+ taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 0);
+ Assert.assertEquals(2, tImpl.getTaskAttemptTezEvents(taId, 0, 1000).size());
+ // vE has single legacy edge and does not use on demand routing and its task provides events
+ vImpl = (VertexImpl) dagImpl.getVertex(vD.getName());
+ Assert.assertEquals(false, vImpl.useOnDemandRouting);
+ tImpl = (TaskImpl) vImpl.getTask(0);
+ taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 0);
+ Assert.assertEquals(2, tImpl.getTaskAttemptTezEvents(taId, 0, 1000).size());
+
+ tezClient.stop();
+ }
@Test (timeout = 10000)
public void testBasicCounters() throws Exception {
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
index f974f40..db8eff1 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
@@ -42,7 +42,6 @@ import org.apache.tez.common.ContainerTask;
import org.apache.tez.common.security.JobTokenSecretManager;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.app.dag.DAG;
-import org.apache.tez.dag.app.dag.Task;
import org.apache.tez.dag.app.dag.Vertex;
import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
import org.apache.tez.dag.app.dag.event.VertexEventRouteEvent;
@@ -62,6 +61,7 @@ import org.apache.tez.runtime.api.impl.EventType;
import org.apache.tez.runtime.api.impl.TaskSpec;
import org.apache.tez.runtime.api.impl.TezEvent;
import org.apache.tez.runtime.api.impl.TezHeartbeatRequest;
+import org.apache.tez.runtime.api.impl.TezHeartbeatResponse;
import org.junit.Before;
import org.junit.Test;
import org.mockito.ArgumentCaptor;
@@ -186,7 +186,7 @@ public class TestTaskAttemptListenerImplTezDag {
new TezEvent(new TaskAttemptCompletedEvent(), null)
);
- EventHandler eventHandler = generateHeartbeat(events);
+ generateHeartbeat(events, 0, 1, 0, new ArrayList<TezEvent>());
ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
verify(eventHandler, times(2)).handle(arg.capture());
@@ -212,7 +212,7 @@ public class TestTaskAttemptListenerImplTezDag {
List<TezEvent> events = Arrays.asList(
new TezEvent(new TaskAttemptCompletedEvent(), null)
);
- final EventHandler eventHandler = generateHeartbeat(events);
+ generateHeartbeat(events, 0, 1, 0, new ArrayList<TezEvent>());
ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
verify(eventHandler, times(1)).handle(arg.capture());
@@ -222,18 +222,29 @@ public class TestTaskAttemptListenerImplTezDag {
assertEquals("only event should be route event", VertexEventType.V_ROUTE_EVENT,
event.getType());
}
+
+ @Test (timeout = 5000)
+ public void testTaskHeartbeatResponse() throws Exception {
+ List<TezEvent> events = new ArrayList<TezEvent>();
+ List<TezEvent> eventsToSend = new ArrayList<TezEvent>();
+ TezHeartbeatResponse response = generateHeartbeat(events, 0, 1, 2, eventsToSend);
+
+ assertEquals(2, response.getNextFromEventId());
+ assertEquals(1, response.getLastRequestId());
+ assertEquals(eventsToSend, response.getEvents());
+ }
- private EventHandler generateHeartbeat(List<TezEvent> events) throws IOException, TezException {
+ private TezHeartbeatResponse generateHeartbeat(List<TezEvent> events,
+ int fromEventId, int maxEvents, int nextFromEventId,
+ List<TezEvent> sendEvents) throws IOException, TezException {
ContainerId containerId = createContainerId(appId, 1);
long requestId = 0;
Vertex vertex = mock(Vertex.class);
- Task task = mock(Task.class);
doReturn(vertex).when(dag).getVertex(vertexID);
doReturn("test_vertex").when(vertex).getName();
- doReturn(task).when(vertex).getTask(taskID);
-
- doReturn(new ArrayList<TezEvent>()).when(task).getTaskAttemptTezEvents(taskAttemptID, 0, 1);
+ TaskAttemptEventInfo eventInfo = new TaskAttemptEventInfo(nextFromEventId, sendEvents);
+ doReturn(eventInfo).when(vertex).getTaskAttemptTezEvents(taskAttemptID, fromEventId, maxEvents);
taskAttemptListener.registerRunningContainer(containerId);
taskAttemptListener.registerTaskAttempt(amContainerTask, containerId);
@@ -243,10 +254,10 @@ public class TestTaskAttemptListenerImplTezDag {
doReturn(taskAttemptID).when(request).getCurrentTaskAttemptID();
doReturn(++requestId).when(request).getRequestId();
doReturn(events).when(request).getEvents();
+ doReturn(maxEvents).when(request).getMaxEvents();
+ doReturn(fromEventId).when(request).getStartIndex();
- taskAttemptListener.heartbeat(request);
-
- return eventHandler;
+ return taskAttemptListener.heartbeat(request);
}
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java
index ba40146..d2aa2d0 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java
@@ -51,9 +51,9 @@ import org.apache.hadoop.yarn.util.Clock;
import org.apache.hadoop.yarn.util.SystemClock;
import org.apache.tez.common.MockDNSToSwitchMapping;
import org.apache.tez.dag.api.DataSinkDescriptor;
-import org.apache.tez.dag.api.EdgeManagerPlugin;
import org.apache.tez.dag.api.EdgeManagerPluginContext;
import org.apache.tez.dag.api.EdgeManagerPluginDescriptor;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand;
import org.apache.tez.dag.api.GroupInputEdge;
import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.EdgeProperty;
@@ -1028,6 +1028,8 @@ public class TestDAGImpl {
new EventMetaData(EventProducerConsumerType.INPUT, "vertex1", "vertex2", ta1.getID()));
dispatcher.getEventHandler().handle(new VertexEventRouteEvent(v2.getVertexId(), Lists.newArrayList(tezEvent)));
dispatcher.await();
+ v2.getTaskAttemptTezEvents(ta1.getID(), 0, 1000);
+ dispatcher.await();
Assert.assertEquals(VertexState.FAILED, v2.getState());
Assert.assertEquals(VertexState.KILLED, v1.getState());
@@ -1037,7 +1039,40 @@ public class TestDAGImpl {
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
- public void testEdgeManager_RouteInputSourceTaskFailedEventToDestination() {
+ public void testEdgeManager_RouteDataMovementEventToDestinationWithLegacyRouting() {
+ // Remove after legacy routing is removed
+ setupDAGWithCustomEdge(ExceptionLocation.RouteDataMovementEventToDestination);
+ dispatcher.getEventHandler().handle(
+ new DAGEvent(dagWithCustomEdge.getID(), DAGEventType.DAG_INIT));
+ dispatcher.getEventHandler().handle(new DAGEventStartDag(dagWithCustomEdge.getID(),
+ null));
+ dispatcher.await();
+ Assert.assertEquals(DAGState.RUNNING, dagWithCustomEdge.getState());
+
+ VertexImpl v1 = (VertexImpl)dagWithCustomEdge.getVertex("vertex1");
+ VertexImpl v2 = (VertexImpl)dagWithCustomEdge.getVertex("vertex2");
+ v1.useOnDemandRouting = false;
+ v2.useOnDemandRouting = false;
+ dispatcher.await();
+ Task t1= v2.getTask(0);
+ TaskAttemptImpl ta1= (TaskAttemptImpl)t1.getAttempt(TezTaskAttemptID.getInstance(t1.getTaskId(), 0));
+
+ DataMovementEvent daEvent = DataMovementEvent.create(ByteBuffer.wrap(new byte[0]));
+ TezEvent tezEvent = new TezEvent(daEvent,
+ new EventMetaData(EventProducerConsumerType.INPUT, "vertex1", "vertex2", ta1.getID()));
+ dispatcher.getEventHandler().handle(new VertexEventRouteEvent(v2.getVertexId(), Lists.newArrayList(tezEvent)));
+ dispatcher.await();
+
+ Assert.assertEquals(VertexState.FAILED, v2.getState());
+ Assert.assertEquals(VertexState.KILLED, v1.getState());
+ String diag = StringUtils.join(v2.getDiagnostics(), ",");
+ Assert.assertTrue(diag.contains(ExceptionLocation.RouteDataMovementEventToDestination.name()));
+ }
+
+ @SuppressWarnings("unchecked")
+ @Test(timeout = 5000)
+ public void testEdgeManager_RouteInputSourceTaskFailedEventToDestinationLegacyRouting() {
+ // Remove after legacy routing is removed
setupDAGWithCustomEdge(ExceptionLocation.RouteInputSourceTaskFailedEventToDestination);
dispatcher.getEventHandler().handle(
new DAGEvent(dagWithCustomEdge.getID(), DAGEventType.DAG_INIT));
@@ -1048,6 +1083,8 @@ public class TestDAGImpl {
VertexImpl v1 = (VertexImpl)dagWithCustomEdge.getVertex("vertex1");
VertexImpl v2 = (VertexImpl)dagWithCustomEdge.getVertex("vertex2");
+ v1.useOnDemandRouting = false;
+ v2.useOnDemandRouting = false;
dispatcher.await();
Task t1= v2.getTask(0);
@@ -1057,13 +1094,15 @@ public class TestDAGImpl {
new EventMetaData(EventProducerConsumerType.INPUT,"vertex1", "vertex2", ta1.getID()));
dispatcher.getEventHandler().handle(new VertexEventRouteEvent(v2.getVertexId(), Lists.newArrayList(tezEvent)));
dispatcher.await();
- //
+ v2.getTaskAttemptTezEvents(ta1.getID(), 0, 1000);
+ dispatcher.await();
Assert.assertEquals(VertexState.FAILED, v2.getState());
+
Assert.assertEquals(VertexState.KILLED, v1.getState());
String diag = StringUtils.join(v2.getDiagnostics(), ",");
Assert.assertTrue(diag.contains(ExceptionLocation.RouteInputSourceTaskFailedEventToDestination.name()));
}
-
+
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testEdgeManager_GetNumDestinationConsumerTasks() {
@@ -1773,7 +1812,7 @@ public class TestDAGImpl {
TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS_DEFAULT));
}
- public static class CustomizedEdgeManager extends EdgeManagerPlugin {
+ public static class CustomizedEdgeManager extends EdgeManagerPluginOnDemand {
public static enum ExceptionLocation {
Initialize,
@@ -1861,6 +1900,47 @@ public class TestDAGImpl {
}
return 0;
}
+
+ @Override
+ public int routeInputErrorEventToSource(int destinationTaskIndex,
+ int destinationFailedInputIndex) throws Exception {
+ if (exLocation == ExceptionLocation.RouteInputErrorEventToSource) {
+ throw new Exception(exLocation.name());
+ }
+ return 0;
+ }
+
+ @Override
+ public EventRouteMetadata routeDataMovementEventToDestination(int sourceTaskIndex,
+ int sourceOutputIndex, int destinationTaskIndex) throws Exception {
+ if (exLocation == ExceptionLocation.RouteDataMovementEventToDestination) {
+ throw new Exception(exLocation.name());
+ }
+ return null;
+ }
+
+ @Override
+ public EventRouteMetadata routeCompositeDataMovementEventToDestination(
+ int sourceTaskIndex, int destinationTaskIndex)
+ throws Exception {
+ if (exLocation == ExceptionLocation.RouteDataMovementEventToDestination) {
+ throw new Exception(exLocation.name());
+ }
+ return null;
+ }
+
+ @Override
+ public EventRouteMetadata routeInputSourceTaskFailedEventToDestination(
+ int sourceTaskIndex, int destinationTaskIndex) throws Exception {
+ if (exLocation == ExceptionLocation.RouteInputSourceTaskFailedEventToDestination) {
+ throw new Exception(exLocation.name());
+ }
+ return null;
+ }
+
+ @Override
+ public void prepareForRouting() throws Exception {
+ }
}
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
index 99ec6cf..a8eaca1 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
@@ -73,6 +73,8 @@ import org.apache.tez.dag.api.EdgeManagerPluginDescriptor;
import org.apache.tez.dag.api.EdgeProperty;
import org.apache.tez.dag.api.EdgeProperty.DataSourceType;
import org.apache.tez.dag.api.EdgeProperty.SchedulingType;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand.EventRouteMetadata;
import org.apache.tez.dag.api.InputDescriptor;
import org.apache.tez.dag.api.InputInitializerDescriptor;
import org.apache.tez.dag.api.OutputDescriptor;
@@ -107,6 +109,7 @@ import org.apache.tez.dag.api.records.DAGProtos.TezEntityDescriptorProto;
import org.apache.tez.dag.api.records.DAGProtos.VertexPlan;
import org.apache.tez.dag.app.AppContext;
import org.apache.tez.dag.app.ContainerHeartbeatHandler;
+import org.apache.tez.dag.app.TaskAttemptEventInfo;
import org.apache.tez.dag.app.TaskAttemptListener;
import org.apache.tez.dag.app.TaskHeartbeatHandler;
import org.apache.tez.dag.app.dag.DAG;
@@ -148,7 +151,6 @@ import org.apache.tez.dag.app.rm.container.ContainerContextMatcher;
import org.apache.tez.dag.history.DAGHistoryEvent;
import org.apache.tez.dag.history.HistoryEventHandler;
import org.apache.tez.dag.history.HistoryEventType;
-import org.apache.tez.dag.history.events.VertexRecoverableEventsGeneratedEvent;
import org.apache.tez.dag.library.vertexmanager.InputReadyVertexManager;
import org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager;
import org.apache.tez.dag.records.TaskAttemptTerminationCause;
@@ -186,8 +188,6 @@ import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import org.mockito.ArgumentCaptor;
-import org.mockito.ArgumentMatcher;
-import org.mockito.Matchers;
import org.mockito.Mockito;
import org.mockito.internal.util.collections.Sets;
@@ -2462,6 +2462,101 @@ public class TestVertexImpl {
VertexImpl v = vertices.get("vertex2");
startVertex(v);
}
+
+ @Test (timeout = 5000)
+ public void testVertexGetTAAttempts() throws Exception {
+ initAllVertices(VertexState.INITED);
+ VertexImpl v1 = vertices.get("vertex1");
+ startVertex(v1);
+ VertexImpl v2 = vertices.get("vertex2");
+ startVertex(v2);
+ VertexImpl v3 = vertices.get("vertex3");
+ VertexImpl v4 = vertices.get("vertex4");
+
+ Assert.assertEquals(VertexState.RUNNING, v4.getState());
+ Assert.assertEquals(1, v4.sourceVertices.size());
+ Edge e = v4.sourceVertices.get(v3);
+ TezTaskAttemptID v3TaId = TezTaskAttemptID.getInstance(
+ TezTaskID.getInstance(v3.getVertexId(), 0), 0);
+ TezTaskAttemptID v4TaId = TezTaskAttemptID.getInstance(
+ TezTaskID.getInstance(v4.getVertexId(), 0), 0);
+
+ for (int i=0; i<5; ++i) {
+ v4.handle(new VertexEventRouteEvent(v4.getVertexId(), Collections.singletonList(
+ new TezEvent(DataMovementEvent.create(0, null),
+ new EventMetaData(EventProducerConsumerType.OUTPUT, v3.getName(), v3.getName(), v3TaId)))));
+ }
+ dispatcher.await();
+ // verify all events have been put in pending.
+ // this is not necessary after legacy routing has been removed
+ Assert.assertEquals(5, v4.pendingTaskEvents.size());
+ v4.scheduleTasks(new LinkedList<VertexManagerPluginContext.TaskWithLocationHint>());
+ // verify all events have been moved to taskEvents
+ Assert.assertEquals(5, v4.getOnDemandRouteEvents().size());
+ for (int i=5; i<11; ++i) {
+ v4.handle(new VertexEventRouteEvent(v4.getVertexId(), Collections.singletonList(
+ new TezEvent(DataMovementEvent.create(0, null),
+ new EventMetaData(EventProducerConsumerType.OUTPUT, v3.getName(), v3.getName(), v3TaId)))));
+ }
+ dispatcher.await();
+ // verify all events have been are in taskEvents
+ Assert.assertEquals(11, v4.getOnDemandRouteEvents().size());
+
+ TaskAttemptEventInfo eventInfo;
+ EdgeManagerPluginOnDemand mockPlugin = mock(EdgeManagerPluginOnDemand.class);
+ EventRouteMetadata mockRoute = EventRouteMetadata.create(1, new int[]{0});
+ e.edgeManager = mockPlugin;
+ // source task id will not match. all events will return null
+ when(mockPlugin.routeDataMovementEventToDestination(1, 0, 0)).thenReturn(mockRoute);
+ eventInfo = v4.getTaskAttemptTezEvents(v4TaId, 0, 1);
+ Assert.assertEquals(11, eventInfo.getNextFromEventId()); // all events traversed
+ Assert.assertEquals(0, eventInfo.getEvents().size()); // no events
+
+ int fromEventId = 0;
+ // source task id will match. all events will be returned
+ // max events is respected.
+ when(
+ mockPlugin.routeDataMovementEventToDestination(anyInt(),
+ anyInt(), anyInt())).thenReturn(mockRoute);
+ for (int i=0; i<11; ++i) {
+ eventInfo = v4.getTaskAttemptTezEvents(v4TaId, fromEventId, 1);
+ fromEventId = eventInfo.getNextFromEventId();
+ Assert.assertEquals((i+1), fromEventId);
+ Assert.assertEquals(1, eventInfo.getEvents().size());
+ }
+ eventInfo = v4.getTaskAttemptTezEvents(v4TaId, fromEventId, 1);
+ Assert.assertEquals(11, eventInfo.getNextFromEventId()); // all events traversed
+ Assert.assertEquals(0, eventInfo.getEvents().size()); // no events
+
+ // change max events to larger value. max events does not evenly divide total events
+ fromEventId = 0;
+ for (int i=1; i<=2; ++i) {
+ eventInfo = v4.getTaskAttemptTezEvents(v4TaId, fromEventId, 5);
+ fromEventId = eventInfo.getNextFromEventId();
+ Assert.assertEquals((i*5), fromEventId);
+ Assert.assertEquals(5, eventInfo.getEvents().size());
+ }
+ eventInfo = v4.getTaskAttemptTezEvents(v4TaId, fromEventId, 5);
+ Assert.assertEquals(11, eventInfo.getNextFromEventId()); // all events traversed
+ Assert.assertEquals(1, eventInfo.getEvents().size()); // remainder events
+
+ // return more events that dont evenly fit in max size
+ mockRoute = EventRouteMetadata.create(2, new int[]{0, 0});
+ when(
+ mockPlugin.routeDataMovementEventToDestination(anyInt(),
+ anyInt(), anyInt())).thenReturn(mockRoute);
+ fromEventId = 0;
+ int lastFromEventId = 0;
+ for (int i=1; i<=4; ++i) {
+ eventInfo = v4.getTaskAttemptTezEvents(v4TaId, fromEventId, 5);
+ fromEventId = eventInfo.getNextFromEventId();
+ Assert.assertEquals((i%2 > 0 ? (lastFromEventId+=2) : (lastFromEventId+=3)), fromEventId);
+ Assert.assertEquals(5, eventInfo.getEvents().size());
+ }
+ eventInfo = v4.getTaskAttemptTezEvents(v4TaId, fromEventId, 5);
+ Assert.assertEquals(11, eventInfo.getNextFromEventId()); // all events traversed
+ Assert.assertEquals(2, eventInfo.getEvents().size()); // remainder events
+ }
@Test(timeout = 5000)
public void testVertexReconfigurePlannedAfterInit() throws Exception {
@@ -2632,11 +2727,15 @@ public class TestVertexImpl {
}
@Test(timeout = 5000)
- public void testVertexPendingTaskEvents() {
+ public void testVertexPendingTaskEventsLegacyRouting() {
+ // Remove after bulk routing API is removed
initAllVertices(VertexState.INITED);
VertexImpl v3 = vertices.get("vertex3");
VertexImpl v2 = vertices.get("vertex2");
VertexImpl v1 = vertices.get("vertex1");
+ v1.useOnDemandRouting = false;
+ v2.useOnDemandRouting = false;
+ v3.useOnDemandRouting = false;
startVertex(v1);
@@ -4595,8 +4694,6 @@ public class TestVertexImpl {
dispatcher.await();
Assert.assertEquals(VertexState.INITED, v1.getState());
Assert.assertEquals(5, v1.getTotalTasks());
- // task events get buffered
- Assert.assertEquals(5, v1.pendingTaskEvents.size());
Assert.assertEquals(RootInputVertexManager.class.getName(), v1
.getVertexManager().getPlugin().getClass().getName());
for (int i=0; i < v1Hints.size(); ++i) {
@@ -4609,6 +4706,16 @@ public class TestVertexImpl {
Assert.assertEquals(1, inputSpecs.get(0).getPhysicalEdgeCount());
}
+ // fake scheduling start to trigger edge routing to begin
+ v1.scheduleTasks(new LinkedList<VertexManagerPluginContext.TaskWithLocationHint>());
+ // check all tasks get their events
+ for (int i=0; i<v1.getTotalTasks(); ++i) {
+ Assert.assertEquals(
+ 1,
+ v1.getTaskAttemptTezEvents(TezTaskAttemptID.getInstance(v1.getTask(i).getTaskId(), 0),
+ 0, 100).getEvents().size());
+ }
+
VertexImplWithControlledInitializerManager v2 = (VertexImplWithControlledInitializerManager) vertices.get("vertex2");
Assert.assertEquals(VertexState.INITIALIZING, v2.getState());
@@ -4627,7 +4734,6 @@ public class TestVertexImpl {
dispatcher.getEventHandler().handle(
new VertexEventRouteEvent(v2.getVertexId(), events));
dispatcher.await();
- Assert.assertEquals(1, v2.pendingTaskEvents.size());
RootInputInitializerManagerControlled initializerManager2 = v2.getRootInputInitializerManager();
List<TaskLocationHint> v2Hints = createTaskLocationHints(10);
@@ -4635,14 +4741,99 @@ public class TestVertexImpl {
dispatcher.await();
Assert.assertEquals(VertexState.INITED, v2.getState());
Assert.assertEquals(10, v2.getTotalTasks());
+ Assert.assertEquals(RootInputVertexManager.class.getName(), v2
+ .getVertexManager().getPlugin().getClass().getName());
+ for (int i=0; i < v2Hints.size(); ++i) {
+ Assert.assertEquals(v2Hints.get(i), v2.getTaskLocationHints()[i]);
+ }
+ Assert.assertEquals(true, initializerManager2.hasShutDown);
+
+ // fake scheduling start to trigger edge routing to begin
+ v2.scheduleTasks(new LinkedList<VertexManagerPluginContext.TaskWithLocationHint>());
+ // check all tasks get their events
+ for (int i=0; i<v2.getTotalTasks(); ++i) {
+ Assert.assertEquals(
+ ((i==0) ? 2 : 1),
+ v2.getTaskAttemptTezEvents(TezTaskAttemptID.getInstance(v2.getTask(i).getTaskId(), 0),
+ 0, 100).getEvents().size());
+ }
+ for (int i = 0; i < 10; i++) {
+ List<InputSpec> inputSpecs = v1.getInputSpecList(i);
+ Assert.assertEquals(1, inputSpecs.size());
+ Assert.assertEquals(1, inputSpecs.get(0).getPhysicalEdgeCount());
+ }
+ }
+
+ @Test(timeout = 5000)
+ public void testVertexWithInitializerSuccessLegacyRouting() throws Exception {
+ // Remove after legacy routing is removed
+ useCustomInitializer = true;
+ setupPreDagCreation();
+ dagPlan = createDAGPlanWithInputInitializer("TestInputInitializer");
+ setupPostDagCreation();
+
+ VertexImplWithControlledInitializerManager v1 = (VertexImplWithControlledInitializerManager) vertices
+ .get("vertex1");
+ v1.useOnDemandRouting = false;
+ dispatcher.getEventHandler().handle(
+ new VertexEvent(v1.getVertexId(), VertexEventType.V_INIT));
+ dispatcher.await();
+ Assert.assertEquals(VertexState.INITIALIZING, v1.getState());
+ RootInputInitializerManagerControlled initializerManager1 = v1.getRootInputInitializerManager();
+ List<TaskLocationHint> v1Hints = createTaskLocationHints(5);
+ initializerManager1.completeInputInitialization(0, 5, v1Hints);
+ dispatcher.await();
+ Assert.assertEquals(VertexState.INITED, v1.getState());
+ Assert.assertEquals(5, v1.getTotalTasks());
+ Assert.assertEquals(RootInputVertexManager.class.getName(), v1
+ .getVertexManager().getPlugin().getClass().getName());
+ for (int i=0; i < v1Hints.size(); ++i) {
+ Assert.assertEquals(v1Hints.get(i), v1.getTaskLocationHints()[i]);
+ }
+ Assert.assertEquals(true, initializerManager1.hasShutDown);
+ for (int i = 0; i < 5; i++) {
+ List<InputSpec> inputSpecs = v1.getInputSpecList(i);
+ Assert.assertEquals(1, inputSpecs.size());
+ Assert.assertEquals(1, inputSpecs.get(0).getPhysicalEdgeCount());
+ }
// task events get buffered
- Assert.assertEquals(11, v2.pendingTaskEvents.size());
+ Assert.assertEquals(5, v1.pendingTaskEvents.size());
+
+ VertexImplWithControlledInitializerManager v2 = (VertexImplWithControlledInitializerManager) vertices.get("vertex2");
+ Assert.assertEquals(VertexState.INITIALIZING, v2.getState());
+ v2.useOnDemandRouting = false;
+
+ // non-task events don't get buffered
+ List<TezEvent> events = Lists.newLinkedList();
+ TezTaskID t0_v1 = TezTaskID.getInstance(v1.getVertexId(), 0);
+ TezTaskAttemptID ta0_t0_v1 = TezTaskAttemptID.getInstance(t0_v1, 0);
+ events.add(new TezEvent(
+ VertexManagerEvent.create("vertex2", ByteBuffer.wrap(new byte[0])), new EventMetaData(
+ EventProducerConsumerType.PROCESSOR, "vertex1", "vertex2",
+ ta0_t0_v1)));
+ events.add(new TezEvent(InputDataInformationEvent.createWithSerializedPayload(0,
+ ByteBuffer.wrap(new byte[0])),
+ new EventMetaData(EventProducerConsumerType.INPUT, "vertex2",
+ "NULL_VERTEX", null)));
+ dispatcher.getEventHandler().handle(
+ new VertexEventRouteEvent(v2.getVertexId(), events));
+ dispatcher.await();
+ Assert.assertEquals(1, v2.pendingTaskEvents.size());
+
+ RootInputInitializerManagerControlled initializerManager2 = v2.getRootInputInitializerManager();
+ List<TaskLocationHint> v2Hints = createTaskLocationHints(10);
+ initializerManager2.completeInputInitialization(0, 10, v2Hints);
+ dispatcher.await();
+ Assert.assertEquals(VertexState.INITED, v2.getState());
+ Assert.assertEquals(10, v2.getTotalTasks());
Assert.assertEquals(RootInputVertexManager.class.getName(), v2
.getVertexManager().getPlugin().getClass().getName());
for (int i=0; i < v2Hints.size(); ++i) {
Assert.assertEquals(v2Hints.get(i), v2.getTaskLocationHints()[i]);
}
Assert.assertEquals(true, initializerManager2.hasShutDown);
+ // task events get buffered
+ Assert.assertEquals(11, v2.pendingTaskEvents.size());
for (int i = 0; i < 10; i++) {
List<InputSpec> inputSpecs = v1.getInputSpecList(i);
Assert.assertEquals(1, inputSpecs.size());
@@ -4650,6 +4841,7 @@ public class TestVertexImpl {
}
}
+
@Test(timeout = 5000)
public void testVertexWithInputDistributor() throws Exception {
useCustomInitializer = true;
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-dag/src/test/java/org/apache/tez/test/EdgeManagerForTest.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/test/EdgeManagerForTest.java b/tez-dag/src/test/java/org/apache/tez/test/EdgeManagerForTest.java
index 09f9a20..9cb914f 100644
--- a/tez-dag/src/test/java/org/apache/tez/test/EdgeManagerForTest.java
+++ b/tez-dag/src/test/java/org/apache/tez/test/EdgeManagerForTest.java
@@ -21,13 +21,13 @@ package org.apache.tez.test;
import java.util.List;
import java.util.Map;
-import org.apache.tez.dag.api.EdgeManagerPlugin;
import org.apache.tez.dag.api.EdgeManagerPluginContext;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand;
import org.apache.tez.dag.api.UserPayload;
import org.apache.tez.runtime.api.events.DataMovementEvent;
import org.apache.tez.runtime.api.events.InputReadErrorEvent;
-public class EdgeManagerForTest extends EdgeManagerPlugin {
+public class EdgeManagerForTest extends EdgeManagerPluginOnDemand {
private UserPayload userPayload;
@@ -78,6 +78,35 @@ public class EdgeManagerForTest extends EdgeManagerPlugin {
public void routeInputSourceTaskFailedEventToDestination(int sourceTaskIndex,
Map<Integer, List<Integer>> destinationTaskAndInputIndices) {
}
+
+ @Override
+ public EventRouteMetadata routeDataMovementEventToDestination(int sourceTaskIndex,
+ int sourceOutputIndex, int destinationTaskIndex) throws Exception {
+ return null;
+ }
+
+ @Override
+ public EventRouteMetadata routeCompositeDataMovementEventToDestination(
+ int sourceTaskIndex, int destinationTaskIndex)
+ throws Exception {
+ return null;
+ }
+
+ @Override
+ public EventRouteMetadata routeInputSourceTaskFailedEventToDestination(
+ int sourceTaskIndex, int destinationTaskIndex) throws Exception {
+ return null;
+ }
+
+ @Override
+ public void prepareForRouting() throws Exception {
+ }
+
+ @Override
+ public int routeInputErrorEventToSource(int destinationTaskIndex, int destinationFailedInputIndex)
+ throws Exception {
+ return 0;
+ }
// End of overridden methods
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java
index f8b8621..921095c 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java
@@ -49,6 +49,7 @@ public abstract class RuntimeTask {
protected final Configuration tezConf;
protected final TezUmbilical tezUmbilical;
protected final AtomicInteger eventCounter;
+ protected final AtomicInteger nextFromEventId;
private final AtomicBoolean taskDone;
private final TaskCounterUpdater counterUpdater;
private final TaskStatistics statistics;
@@ -60,6 +61,7 @@ public abstract class RuntimeTask {
this.tezUmbilical = tezUmbilical;
this.tezCounters = new TezCounters();
this.eventCounter = new AtomicInteger(0);
+ this.nextFromEventId = new AtomicInteger(0);
this.progress = 0.0f;
this.taskDone = new AtomicBoolean(false);
this.statistics = new TaskStatistics();
@@ -130,6 +132,14 @@ public abstract class RuntimeTask {
public int getEventCounter() {
return eventCounter.get();
}
+
+ public int getNextFromEventId() {
+ return nextFromEventId.get();
+ }
+
+ public void setNextFromEventId(int nextFromEventId) {
+ this.nextFromEventId.set(nextFromEventId);
+ }
public boolean isTaskDone() {
return taskDone.get();
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezHeartbeatResponse.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezHeartbeatResponse.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezHeartbeatResponse.java
index 10699ac..cecc706 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezHeartbeatResponse.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezHeartbeatResponse.java
@@ -32,6 +32,7 @@ public class TezHeartbeatResponse implements Writable {
private long lastRequestId;
private boolean shouldDie = false;
private List<TezEvent> events;
+ private int nextFromEventId;
public TezHeartbeatResponse() {
}
@@ -51,6 +52,10 @@ public class TezHeartbeatResponse implements Writable {
public long getLastRequestId() {
return lastRequestId;
}
+
+ public int getNextFromEventId() {
+ return nextFromEventId;
+ }
public void setEvents(List<TezEvent> events) {
this.events = Collections.unmodifiableList(events);
@@ -63,11 +68,16 @@ public class TezHeartbeatResponse implements Writable {
public void setShouldDie() {
this.shouldDie = true;
}
+
+ public void setNextFromEventId(int nextFromEventId) {
+ this.nextFromEventId = nextFromEventId;
+ }
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(lastRequestId);
out.writeBoolean(shouldDie);
+ out.writeInt(nextFromEventId);
if(events != null) {
out.writeBoolean(true);
out.writeInt(events.size());
@@ -83,6 +93,7 @@ public class TezHeartbeatResponse implements Writable {
public void readFields(DataInput in) throws IOException {
lastRequestId = in.readLong();
shouldDie = in.readBoolean();
+ nextFromEventId = in.readInt();
if(in.readBoolean()) {
int eventCount = in.readInt();
events = new ArrayList<TezEvent>(eventCount);
@@ -99,6 +110,7 @@ public class TezHeartbeatResponse implements Writable {
return "{ "
+ " lastRequestId=" + lastRequestId
+ ", shouldDie=" + shouldDie
+ + ", nextFromEventId=" + nextFromEventId
+ ", eventCount=" + (events != null ? events.size() : 0)
+ " }";
}
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
index 7324abd..3d1d1a2 100644
--- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
+++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java
@@ -240,8 +240,9 @@ public class TaskReporter {
}
long requestId = requestCounter.incrementAndGet();
+ int fromEventId = task.getNextFromEventId();
TezHeartbeatRequest request = new TezHeartbeatRequest(requestId, events, containerIdStr,
- task.getTaskAttemptID(), task.getEventCounter(), maxEventsToGet);
+ task.getTaskAttemptID(), fromEventId, maxEventsToGet);
if (LOG.isDebugEnabled()) {
LOG.debug("Sending heartbeat to AM, request=" + request);
}
@@ -271,11 +272,12 @@ public class TaskReporter {
+ " heartbeat response, eventCount=" + response.getEvents().size());
}
} else {
+ task.setNextFromEventId(response.getNextFromEventId());
if (response.getEvents() != null && !response.getEvents().isEmpty()) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Routing events from heartbeat response to task" + ", currentTaskAttemptId="
- + task.getTaskAttemptID() + ", eventCount=" + response.getEvents().size());
- }
+ LOG.info("Routing events from heartbeat response to task" + ", currentTaskAttemptId="
+ + task.getTaskAttemptID() + ", eventCount=" + response.getEvents().size()
+ + " fromEventId=" + fromEventId
+ + " nextFromEventId=" + response.getNextFromEventId());
// This should ideally happen in a separate thread
numEventsReceived = response.getEvents().size();
task.handleEvents(response.getEvents());
[21/43] tez git commit: TEZ-2117. Add a manager for
ContainerLaunchers running in the AM. (sseth)
Posted by ss...@apache.org.
TEZ-2117. Add a manager for ContainerLaunchers running in the AM.
(sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/5f27b83a
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/5f27b83a
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/5f27b83a
Branch: refs/heads/TEZ-2003
Commit: 5f27b83ad5c02b3ce111694246ba536de543f691
Parents: aadd049
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed Feb 18 14:45:34 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:01 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../org/apache/tez/dag/app/DAGAppMaster.java | 54 ++--------
.../tez/dag/app/launcher/ContainerLauncher.java | 2 +-
.../app/launcher/ContainerLauncherRouter.java | 108 +++++++++++++++++++
.../apache/tez/dag/app/MockDAGAppMaster.java | 5 +-
5 files changed, 124 insertions(+), 46 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/5f27b83a/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 975ce65..1cd74a4 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -2,5 +2,6 @@ ALL CHANGES:
TEZ-2019. Temporarily allow the scheduler and launcher to be specified via configuration.
TEZ-2006. Task communication plane needs to be pluggable.
TEZ-2090. Add tests for jobs running in external services.
+ TEZ-2117. Add a manager for ContainerLaunchers running in the AM.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/5f27b83a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index bfc2d58..0f4d812 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -25,8 +25,6 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
@@ -139,9 +137,7 @@ import org.apache.tez.dag.app.dag.event.TaskEventType;
import org.apache.tez.dag.app.dag.event.VertexEvent;
import org.apache.tez.dag.app.dag.event.VertexEventType;
import org.apache.tez.dag.app.dag.impl.DAGImpl;
-import org.apache.tez.dag.app.launcher.ContainerLauncher;
-import org.apache.tez.dag.app.launcher.ContainerLauncherImpl;
-import org.apache.tez.dag.app.launcher.LocalContainerLauncher;
+import org.apache.tez.dag.app.launcher.ContainerLauncherRouter;
import org.apache.tez.dag.app.rm.AMSchedulerEventType;
import org.apache.tez.dag.app.rm.NMCommunicatorEventType;
import org.apache.tez.dag.app.rm.TaskSchedulerEventHandler;
@@ -226,7 +222,7 @@ public class DAGAppMaster extends AbstractService {
private AppContext context;
private Configuration amConf;
private AsyncDispatcher dispatcher;
- private ContainerLauncher containerLauncher;
+ private ContainerLauncherRouter containerLauncherRouter;
private ContainerHeartbeatHandler containerHeartbeatHandler;
private TaskHeartbeatHandler taskHeartbeatHandler;
private TaskAttemptListener taskAttemptListener;
@@ -504,9 +500,9 @@ public class DAGAppMaster extends AbstractService {
taskSchedulerEventHandler);
addIfServiceDependency(taskSchedulerEventHandler, clientRpcServer);
- containerLauncher = createContainerLauncher(context);
- addIfService(containerLauncher, true);
- dispatcher.register(NMCommunicatorEventType.class, containerLauncher);
+ this.containerLauncherRouter = createContainerLauncherRouter(conf);
+ addIfService(containerLauncherRouter, true);
+ dispatcher.register(NMCommunicatorEventType.class, containerLauncherRouter);
historyEventHandler = createHistoryEventHandler(context);
addIfService(historyEventHandler, true);
@@ -1034,38 +1030,10 @@ public class DAGAppMaster extends AbstractService {
return chh;
}
- protected ContainerLauncher
- createContainerLauncher(final AppContext context) throws UnknownHostException {
- if(isLocal){
- LOG.info("Creating LocalContainerLauncher");
- return new LocalContainerLauncher(context, taskAttemptListener, workingDirectory);
- } else {
- // TODO: Temporary reflection with specific parameters until a clean interface is defined.
- String containerLauncherClassName = getConfig().get(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHER_CLASS);
- if (containerLauncherClassName == null) {
- LOG.info("Creating Default Container Launcher");
- return new ContainerLauncherImpl(context);
- } else {
- LOG.info("Creating container launcher : " + containerLauncherClassName);
- Class<? extends ContainerLauncher> containerLauncherClazz = (Class<? extends ContainerLauncher>) ReflectionUtils.getClazz(
- containerLauncherClassName);
- try {
- Constructor<? extends ContainerLauncher> ctor = containerLauncherClazz
- .getConstructor(AppContext.class, Configuration.class, TaskAttemptListener.class);
- ctor.setAccessible(true);
- ContainerLauncher instance = ctor.newInstance(context, getConfig(), taskAttemptListener);
- return instance;
- } catch (NoSuchMethodException e) {
- throw new TezUncheckedException(e);
- } catch (InvocationTargetException e) {
- throw new TezUncheckedException(e);
- } catch (InstantiationException e) {
- throw new TezUncheckedException(e);
- } catch (IllegalAccessException e) {
- throw new TezUncheckedException(e);
- }
- }
- }
+ protected ContainerLauncherRouter createContainerLauncherRouter(Configuration conf) throws
+ UnknownHostException {
+ return new ContainerLauncherRouter(conf, isLocal, context, taskAttemptListener, workingDirectory);
+
}
public ApplicationId getAppID() {
@@ -1088,8 +1056,8 @@ public class DAGAppMaster extends AbstractService {
return dispatcher;
}
- public ContainerLauncher getContainerLauncher() {
- return containerLauncher;
+ public ContainerLauncherRouter getContainerLauncherRouter() {
+ return containerLauncherRouter;
}
public TaskAttemptListener getTaskAttemptListener() {
http://git-wip-us.apache.org/repos/asf/tez/blob/5f27b83a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java
index 305eb50..8a8498f 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java
@@ -23,7 +23,7 @@ import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.tez.dag.app.dag.DAG;
import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
-public interface ContainerLauncher
+public interface ContainerLauncher
extends EventHandler<NMCommunicatorEvent> {
void dagComplete(DAG dag);
http://git-wip-us.apache.org/repos/asf/tez/blob/5f27b83a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
new file mode 100644
index 0000000..34001ed
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app.launcher;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.net.UnknownHostException;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.tez.common.ReflectionUtils;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezUncheckedException;
+import org.apache.tez.dag.app.AppContext;
+import org.apache.tez.dag.app.TaskAttemptListener;
+import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
+
+public class ContainerLauncherRouter extends AbstractService
+ implements EventHandler<NMCommunicatorEvent> {
+
+ static final Log LOG = LogFactory.getLog(ContainerLauncherImpl.class);
+
+ private final ContainerLauncher containerLauncher;
+
+ @VisibleForTesting
+ public ContainerLauncherRouter(ContainerLauncher containerLauncher) {
+ super(ContainerLauncherRouter.class.getName());
+ this.containerLauncher = containerLauncher;
+ }
+
+ // Accepting conf to setup final parameters, if required.
+ public ContainerLauncherRouter(Configuration conf, boolean isLocal, AppContext context,
+ TaskAttemptListener taskAttemptListener,
+ String workingDirectory) throws UnknownHostException {
+ super(ContainerLauncherRouter.class.getName());
+
+ if (isLocal) {
+ LOG.info("Creating LocalContainerLauncher");
+ containerLauncher =
+ new LocalContainerLauncher(context, taskAttemptListener, workingDirectory);
+ } else {
+ // TODO: Temporary reflection with specific parameters until a clean interface is defined.
+ String containerLauncherClassName =
+ conf.get(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHER_CLASS);
+ if (containerLauncherClassName == null) {
+ LOG.info("Creating Default Container Launcher");
+ containerLauncher = new ContainerLauncherImpl(context);
+ } else {
+ LOG.info("Creating container launcher : " + containerLauncherClassName);
+ Class<? extends ContainerLauncher> containerLauncherClazz =
+ (Class<? extends ContainerLauncher>) ReflectionUtils.getClazz(
+ containerLauncherClassName);
+ try {
+ Constructor<? extends ContainerLauncher> ctor = containerLauncherClazz
+ .getConstructor(AppContext.class, Configuration.class, TaskAttemptListener.class);
+ ctor.setAccessible(true);
+ containerLauncher = ctor.newInstance(context, conf, taskAttemptListener);
+ } catch (NoSuchMethodException e) {
+ throw new TezUncheckedException(e);
+ } catch (InvocationTargetException e) {
+ throw new TezUncheckedException(e);
+ } catch (InstantiationException e) {
+ throw new TezUncheckedException(e);
+ } catch (IllegalAccessException e) {
+ throw new TezUncheckedException(e);
+ }
+ }
+
+ }
+ }
+
+ @Override
+ public void serviceInit(Configuration conf) {
+ ((AbstractService)containerLauncher).init(conf);
+ }
+
+ @Override
+ public void serviceStart() {
+ ((AbstractService)containerLauncher).start();
+ }
+
+ @Override
+ public void serviceStop() {
+ ((AbstractService)containerLauncher).stop();
+ }
+
+
+ @Override
+ public void handle(NMCommunicatorEvent event) {
+ containerLauncher.handle(event);
+ }
+}
http://git-wip-us.apache.org/repos/asf/tez/blob/5f27b83a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index b846922..7274cde 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -57,6 +57,7 @@ import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TaskCommunicator;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.app.launcher.ContainerLauncher;
+import org.apache.tez.dag.app.launcher.ContainerLauncherRouter;
import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
import org.apache.tez.dag.app.rm.NMCommunicatorLaunchRequestEvent;
import org.apache.tez.dag.app.rm.NMCommunicatorStopRequestEvent;
@@ -509,9 +510,9 @@ public class MockDAGAppMaster extends DAGAppMaster {
// use mock container launcher for tests
@Override
- protected ContainerLauncher createContainerLauncher(final AppContext context)
+ protected ContainerLauncherRouter createContainerLauncherRouter(final Configuration conf)
throws UnknownHostException {
- return containerLauncher;
+ return new ContainerLauncherRouter(containerLauncher);
}
@Override
[41/43] tez git commit: TEZ-2388. Send dag identifier as part of the
fetcher request string. (sseth)
Posted by ss...@apache.org.
TEZ-2388. Send dag identifier as part of the fetcher request string. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/fdb91771
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/fdb91771
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/fdb91771
Branch: refs/heads/TEZ-2003
Commit: fdb91771f0235295ef078871501c1d7d81b63ba8
Parents: 9e098f7
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed Apr 29 08:20:05 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:44:34 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../tez/runtime/library/common/shuffle/Fetcher.java | 14 ++++++++------
.../runtime/library/common/shuffle/ShuffleUtils.java | 8 +++++---
.../library/common/shuffle/impl/ShuffleManager.java | 2 +-
.../ShuffleInputEventHandlerOrderedGrouped.java | 2 +-
.../runtime/library/common/shuffle/TestFetcher.java | 6 +++---
6 files changed, 19 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/fdb91771/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index d42aaf8..9fc9ed3 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -19,5 +19,6 @@ ALL CHANGES:
TEZ-2347. Expose additional information in TaskCommunicatorContext.
TEZ-2361. Propagate dag completion to TaskCommunicator.
TEZ-2381. Fixes after rebase 04/28.
+ TEZ-2388. Send dag identifier as part of the fetcher request string.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/fdb91771/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java
index 61e0151..9fd46a4 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java
@@ -86,6 +86,7 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
private final FetcherCallback fetcherCallback;
private final FetchedInputAllocator inputManager;
private final ApplicationId appId;
+ private final int dagIdentifier;
private final String logIdentifier;
@@ -124,7 +125,7 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
private final boolean isDebugEnabled = LOG.isDebugEnabled();
private Fetcher(FetcherCallback fetcherCallback, HttpConnectionParams params,
- FetchedInputAllocator inputManager, ApplicationId appId,
+ FetchedInputAllocator inputManager, ApplicationId appId, int dagIdentifier,
JobTokenSecretManager jobTokenSecretManager, String srcNameTrimmed, Configuration conf,
RawLocalFileSystem localFs,
LocalDirAllocator localDirAllocator,
@@ -137,6 +138,7 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
this.inputManager = inputManager;
this.jobTokenSecretMgr = jobTokenSecretManager;
this.appId = appId;
+ this.dagIdentifier = dagIdentifier;
this.pathToAttemptMap = new HashMap<String, InputAttemptIdentifier>();
this.httpConnectionParams = params;
this.conf = conf;
@@ -402,7 +404,7 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
private HostFetchResult setupConnection(List<InputAttemptIdentifier> attempts) {
try {
StringBuilder baseURI = ShuffleUtils.constructBaseURIForShuffleHandler(host,
- port, partition, appId.toString(), httpConnectionParams.isSSLShuffleEnabled());
+ port, partition, appId.toString(), dagIdentifier, httpConnectionParams.isSSLShuffleEnabled());
this.url = ShuffleUtils.constructInputURL(baseURI.toString(), attempts,
httpConnectionParams.getKeepAlive());
@@ -902,21 +904,21 @@ public class Fetcher extends CallableWithNdc<FetchResult> {
public FetcherBuilder(FetcherCallback fetcherCallback,
HttpConnectionParams params, FetchedInputAllocator inputManager,
- ApplicationId appId, JobTokenSecretManager jobTokenSecretMgr, String srcNameTrimmed,
+ ApplicationId appId, int dagIdentifier, JobTokenSecretManager jobTokenSecretMgr, String srcNameTrimmed,
Configuration conf, boolean localDiskFetchEnabled, String localHostname, int shufflePort) {
- this.fetcher = new Fetcher(fetcherCallback, params, inputManager, appId,
+ this.fetcher = new Fetcher(fetcherCallback, params, inputManager, appId, dagIdentifier,
jobTokenSecretMgr, srcNameTrimmed, conf, null, null, null, localDiskFetchEnabled,
false, localHostname, shufflePort);
}
public FetcherBuilder(FetcherCallback fetcherCallback,
HttpConnectionParams params, FetchedInputAllocator inputManager,
- ApplicationId appId, JobTokenSecretManager jobTokenSecretMgr, String srcNameTrimmed,
+ ApplicationId appId, int dagIdentifier, JobTokenSecretManager jobTokenSecretMgr, String srcNameTrimmed,
Configuration conf, RawLocalFileSystem localFs,
LocalDirAllocator localDirAllocator, Path lockPath,
boolean localDiskFetchEnabled, boolean sharedFetchEnabled,
String localHostname, int shufflePort) {
- this.fetcher = new Fetcher(fetcherCallback, params, inputManager, appId,
+ this.fetcher = new Fetcher(fetcherCallback, params, inputManager, appId, dagIdentifier,
jobTokenSecretMgr, srcNameTrimmed, conf, localFs, localDirAllocator,
lockPath, localDiskFetchEnabled, sharedFetchEnabled, localHostname, shufflePort);
}
http://git-wip-us.apache.org/repos/asf/tez/blob/fdb91771/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java
index 46489ed..d7cb7c1 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java
@@ -174,19 +174,21 @@ public class ShuffleUtils {
// TODO NEWTEZ handle ssl shuffle
public static StringBuilder constructBaseURIForShuffleHandler(String host,
- int port, int partition, String appId, boolean sslShuffle) {
+ int port, int partition, String appId, int dagIdentifier, boolean sslShuffle) {
return constructBaseURIForShuffleHandler(host + ":" + String.valueOf(port),
- partition, appId, sslShuffle);
+ partition, appId, dagIdentifier, sslShuffle);
}
public static StringBuilder constructBaseURIForShuffleHandler(String hostIdentifier,
- int partition, String appId, boolean sslShuffle) {
+ int partition, String appId, int dagIdentifier, boolean sslShuffle) {
final String http_protocol = (sslShuffle) ? "https://" : "http://";
StringBuilder sb = new StringBuilder(http_protocol);
sb.append(hostIdentifier);
sb.append("/");
sb.append("mapOutput?job=");
sb.append(appId.replace("application", "job"));
+ sb.append("&dag=");
+ sb.append(String.valueOf(dagIdentifier));
sb.append("&reduce=");
sb.append(String.valueOf(partition));
sb.append("&map=");
http://git-wip-us.apache.org/repos/asf/tez/blob/fdb91771/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
index ac7caca..437de76 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
@@ -393,7 +393,7 @@ public class ShuffleManager implements FetcherCallback {
}
FetcherBuilder fetcherBuilder = new FetcherBuilder(ShuffleManager.this,
- httpConnectionParams, inputManager, inputContext.getApplicationId(),
+ httpConnectionParams, inputManager, inputContext.getApplicationId(), inputContext.getDagIdentifier(),
jobTokenSecretMgr, srcNameTrimmed, conf, localFs, localDirAllocator,
lockDisk, localDiskFetchEnabled, sharedFetchEnabled,
localhostName, shufflePort);
http://git-wip-us.apache.org/repos/asf/tez/blob/fdb91771/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java
index 32ac766..9481e65 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java
@@ -127,7 +127,7 @@ public class ShuffleInputEventHandlerOrderedGrouped {
@VisibleForTesting
URI getBaseURI(String host, int port, int partitionId) {
StringBuilder sb = ShuffleUtils.constructBaseURIForShuffleHandler(host, port,
- partitionId, inputContext.getApplicationId().toString(), sslShuffle);
+ partitionId, inputContext.getApplicationId().toString(), inputContext.getDagIdentifier(), sslShuffle);
URI u = URI.create(sb.toString());
return u;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/fdb91771/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
index 4ef187d..d2b0bde 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
@@ -70,7 +70,7 @@ public class TestFetcher {
final boolean DISABLE_LOCAL_FETCH = false;
Fetcher.FetcherBuilder builder = new Fetcher.FetcherBuilder(fetcherCallback, null, null,
- ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, ENABLE_LOCAL_FETCH, HOST, PORT);
+ ApplicationId.newInstance(0, 1), 1, null, "fetcherTest", conf, ENABLE_LOCAL_FETCH, HOST, PORT);
builder.assignWork(HOST, PORT, 0, Arrays.asList(srcAttempts));
Fetcher fetcher = spy(builder.build());
@@ -119,7 +119,7 @@ public class TestFetcher {
// When disabled use http fetch
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, false);
builder = new Fetcher.FetcherBuilder(fetcherCallback, null, null,
- ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, DISABLE_LOCAL_FETCH, HOST, PORT);
+ ApplicationId.newInstance(0, 1), 1, null, "fetcherTest", conf, DISABLE_LOCAL_FETCH, HOST, PORT);
builder.assignWork(HOST, PORT, 0, Arrays.asList(srcAttempts));
fetcher = spy(builder.build());
@@ -152,7 +152,7 @@ public class TestFetcher {
int partition = 42;
FetcherCallback callback = mock(FetcherCallback.class);
Fetcher.FetcherBuilder builder = new Fetcher.FetcherBuilder(callback, null, null,
- ApplicationId.newInstance(0, 1), null, "fetcherTest", conf, true, HOST, PORT);
+ ApplicationId.newInstance(0, 1), 1, null, "fetcherTest", conf, true, HOST, PORT);
builder.assignWork(HOST, PORT, partition, Arrays.asList(srcAttempts));
Fetcher fetcher = spy(builder.build());
[20/43] tez git commit: TEZ-2122. Setup pluggable components at
AM/Vertex level. (sseth)
Posted by ss...@apache.org.
TEZ-2122. Setup pluggable components at AM/Vertex level. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/cee48099
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/cee48099
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/cee48099
Branch: refs/heads/TEZ-2003
Commit: cee4809942dbdeafabc7e9442e17aab54c54fdba
Parents: 5f27b83
Author: Siddharth Seth <ss...@apache.org>
Authored: Thu Feb 19 14:59:18 2015 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:01 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../apache/tez/dag/api/TezConfiguration.java | 29 +++-
.../org/apache/tez/dag/api/TezConstants.java | 3 +
.../java/org/apache/tez/dag/app/AppContext.java | 4 +
.../org/apache/tez/dag/app/DAGAppMaster.java | 121 +++++++++++++-
.../dag/app/TaskAttemptListenerImpTezDag.java | 77 +++++----
.../java/org/apache/tez/dag/app/dag/Vertex.java | 4 +
.../tez/dag/app/dag/impl/TaskAttemptImpl.java | 8 +-
.../apache/tez/dag/app/dag/impl/VertexImpl.java | 47 ++++++
.../app/launcher/ContainerLauncherRouter.java | 93 +++++++----
.../app/rm/AMSchedulerEventTALaunchRequest.java | 22 ++-
.../dag/app/rm/TaskSchedulerEventHandler.java | 163 +++++++++++--------
.../apache/tez/dag/app/MockDAGAppMaster.java | 5 +-
.../app/TestTaskAttemptListenerImplTezDag.java | 19 +--
.../tez/dag/app/rm/TestContainerReuse.java | 2 +-
.../app/rm/TestTaskSchedulerEventHandler.java | 12 +-
.../dag/app/rm/TestTaskSchedulerHelpers.java | 18 +-
.../tez/tests/TestExternalTezServices.java | 19 ++-
18 files changed, 458 insertions(+), 189 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 1cd74a4..4bfe08f 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -3,5 +3,6 @@ ALL CHANGES:
TEZ-2006. Task communication plane needs to be pluggable.
TEZ-2090. Add tests for jobs running in external services.
TEZ-2117. Add a manager for ContainerLaunchers running in the AM.
+ TEZ-2122. Setup pluggable components at AM/Vertex level.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
index 1cd478e..1f5f157 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
@@ -1168,13 +1168,36 @@ public class TezConfiguration extends Configuration {
+ "tez-ui.webservice.enable";
public static final boolean TEZ_AM_WEBSERVICE_ENABLE_DEFAULT = true;
+ /** defaults container-launcher for the specific vertex */
@ConfigurationScope(Scope.VERTEX)
- public static final String TEZ_AM_CONTAINER_LAUNCHER_CLASS = TEZ_AM_PREFIX + "container-launcher.class";
+ public static final String TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME = TEZ_AM_PREFIX + "vertex.container-launcher.name";
+ /** defaults task-scheduler for the specific vertex */
@ConfigurationScope(Scope.VERTEX)
- public static final String TEZ_AM_TASK_SCHEDULER_CLASS = TEZ_AM_PREFIX + "task-scheduler.class";
+ public static final String TEZ_AM_VERTEX_TASK_SCHEDULER_NAME = TEZ_AM_PREFIX + "vertex.task-scheduler.name";
+ /** defaults task-communicator for the specific vertex */
@ConfigurationScope(Scope.VERTEX)
- public static final String TEZ_AM_TASK_COMMUNICATOR_CLASS = TEZ_AM_PREFIX + "task-communicator.class";
+ public static final String TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME = TEZ_AM_PREFIX + "vertex.task-communicator.name";
+ /** Comma separated list of named container-launcher classes running in the AM.
+ * The format for each entry is NAME:CLASSNAME, except for tez default which is specified as Tez
+ * e.g. Tez, ExtService:org.apache.ExtLauncherClasss
+ * */
+ @ConfigurationScope(Scope.AM)
+ public static final String TEZ_AM_CONTAINER_LAUNCHERS = TEZ_AM_PREFIX + "container-launchers";
+
+ /** Comma separated list of task-schedulers classes running in the AM.
+ * The format for each entry is NAME:CLASSNAME, except for tez default which is specified as Tez
+ * e.g. Tez, ExtService:org.apache.ExtSchedulerClasss
+ */
+ @ConfigurationScope(Scope.AM)
+ public static final String TEZ_AM_TASK_SCHEDULERS = TEZ_AM_PREFIX + "task-schedulers";
+
+ /** Comma separated list of task-communicators classes running in the AM.
+ * The format for each entry is NAME:CLASSNAME, except for tez default which is specified as Tez
+ * e.g. Tez, ExtService:org.apache.ExtTaskCommClass
+ * */
+ @ConfigurationScope(Scope.AM)
+ public static final String TEZ_AM_TASK_COMMUNICATORS = TEZ_AM_PREFIX + "task-communicators";
// TODO only validate property here, value can also be validated if necessary
public static void validateProperty(String property, Scope usedScope) {
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java
index bc4208f..3b07c59 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java
@@ -102,4 +102,7 @@ public class TezConstants {
/// Version-related Environment variables
public static final String TEZ_CLIENT_VERSION_ENV = "TEZ_CLIENT_VERSION";
+
+ public static final String TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT = "Tez";
+ public static final String TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT = "TezLocal";
}
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java b/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java
index 37f7624..9463226 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java
@@ -109,4 +109,8 @@ public interface AppContext {
String getAMUser();
Credentials getAppCredentials();
+
+ public Integer getTaskCommunicatorIdentifier(String name);
+ public Integer getTaskScheduerIdentifier(String name);
+ public Integer getContainerLauncherIdentifier(String name);
}
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index 0f4d812..6814cda 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -54,6 +54,8 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import com.google.common.collect.BiMap;
+import com.google.common.collect.HashBiMap;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.Options;
@@ -266,7 +268,12 @@ public class DAGAppMaster extends AbstractService {
private ExecutorService rawExecutor;
private ListeningExecutorService execService;
-
+
+ // TODO May not need to be a bidi map
+ private final BiMap<String, Integer> taskSchedulers = HashBiMap.create();
+ private final BiMap<String, Integer> containerLaunchers = HashBiMap.create();
+ private final BiMap<String, Integer> taskCommunicators = HashBiMap.create();
+
/**
* set of already executed dag names.
*/
@@ -370,6 +377,29 @@ public class DAGAppMaster extends AbstractService {
this.isLocal = conf.getBoolean(TezConfiguration.TEZ_LOCAL_MODE,
TezConfiguration.TEZ_LOCAL_MODE_DEFAULT);
+ String tezDefaultClassIdentifier =
+ isLocal ? TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT :
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT;
+
+ String[] taskSchedulerClassIdentifiers = parsePlugins(taskSchedulers,
+ conf.getTrimmedStrings(TezConfiguration.TEZ_AM_TASK_SCHEDULERS,
+ tezDefaultClassIdentifier),
+ TezConfiguration.TEZ_AM_TASK_SCHEDULERS);
+
+ String[] containerLauncherClassIdentifiers = parsePlugins(containerLaunchers,
+ conf.getTrimmedStrings(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHERS,
+ tezDefaultClassIdentifier),
+ TezConfiguration.TEZ_AM_CONTAINER_LAUNCHERS);
+
+ String[] taskCommunicatorClassIdentifiers = parsePlugins(taskCommunicators,
+ conf.getTrimmedStrings(TezConfiguration.TEZ_AM_TASK_COMMUNICATORS,
+ tezDefaultClassIdentifier),
+ TezConfiguration.TEZ_AM_TASK_COMMUNICATORS);
+
+ LOG.info(buildPluginComponentLog(taskSchedulerClassIdentifiers, taskSchedulers, "TaskSchedulers"));
+ LOG.info(buildPluginComponentLog(containerLauncherClassIdentifiers, containerLaunchers, "ContainerLaunchers"));
+ LOG.info(buildPluginComponentLog(taskCommunicatorClassIdentifiers, taskCommunicators, "TaskCommunicators"));
+
boolean disableVersionCheck = conf.getBoolean(
TezConfiguration.TEZ_AM_DISABLE_CLIENT_VERSION_CHECK,
TezConfiguration.TEZ_AM_DISABLE_CLIENT_VERSION_CHECK_DEFAULT);
@@ -435,7 +465,7 @@ public class DAGAppMaster extends AbstractService {
//service to handle requests to TaskUmbilicalProtocol
taskAttemptListener = createTaskAttemptListener(context,
- taskHeartbeatHandler, containerHeartbeatHandler);
+ taskHeartbeatHandler, containerHeartbeatHandler, taskCommunicatorClassIdentifiers);
addIfService(taskAttemptListener, true);
containerSignatureMatcher = createContainerSignatureMatcher();
@@ -482,7 +512,8 @@ public class DAGAppMaster extends AbstractService {
}
this.taskSchedulerEventHandler = new TaskSchedulerEventHandler(context,
- clientRpcServer, dispatcher.getEventHandler(), containerSignatureMatcher, webUIService);
+ clientRpcServer, dispatcher.getEventHandler(), containerSignatureMatcher, webUIService,
+ taskSchedulerClassIdentifiers);
addIfService(taskSchedulerEventHandler, true);
if (enableWebUIService()) {
@@ -500,7 +531,7 @@ public class DAGAppMaster extends AbstractService {
taskSchedulerEventHandler);
addIfServiceDependency(taskSchedulerEventHandler, clientRpcServer);
- this.containerLauncherRouter = createContainerLauncherRouter(conf);
+ this.containerLauncherRouter = createContainerLauncherRouter(conf, containerLauncherClassIdentifiers);
addIfService(containerLauncherRouter, true);
dispatcher.register(NMCommunicatorEventType.class, containerLauncherRouter);
@@ -1007,9 +1038,9 @@ public class DAGAppMaster extends AbstractService {
}
protected TaskAttemptListener createTaskAttemptListener(AppContext context,
- TaskHeartbeatHandler thh, ContainerHeartbeatHandler chh) {
+ TaskHeartbeatHandler thh, ContainerHeartbeatHandler chh, String[] taskCommunicatorClasses) {
TaskAttemptListener lis =
- new TaskAttemptListenerImpTezDag(context, thh, chh,jobTokenSecretManager);
+ new TaskAttemptListenerImpTezDag(context, thh, chh,jobTokenSecretManager, taskCommunicatorClasses);
return lis;
}
@@ -1030,9 +1061,9 @@ public class DAGAppMaster extends AbstractService {
return chh;
}
- protected ContainerLauncherRouter createContainerLauncherRouter(Configuration conf) throws
+ protected ContainerLauncherRouter createContainerLauncherRouter(Configuration conf, String []containerLauncherClasses) throws
UnknownHostException {
- return new ContainerLauncherRouter(conf, isLocal, context, taskAttemptListener, workingDirectory);
+ return new ContainerLauncherRouter(conf, context, taskAttemptListener, workingDirectory, containerLauncherClasses);
}
@@ -1459,6 +1490,21 @@ public class DAGAppMaster extends AbstractService {
}
@Override
+ public Integer getTaskCommunicatorIdentifier(String name) {
+ return taskCommunicators.get(name);
+ }
+
+ @Override
+ public Integer getTaskScheduerIdentifier(String name) {
+ return taskSchedulers.get(name);
+ }
+
+ @Override
+ public Integer getContainerLauncherIdentifier(String name) {
+ return taskCommunicators.get(name);
+ }
+
+ @Override
public Map<ApplicationAccessType, String> getApplicationACLs() {
if (getServiceState() != STATE.STARTED) {
throw new TezUncheckedException(
@@ -2233,4 +2279,63 @@ public class DAGAppMaster extends AbstractService {
return amConf.getBoolean(TezConfiguration.TEZ_AM_WEBSERVICE_ENABLE,
TezConfiguration.TEZ_AM_WEBSERVICE_ENABLE_DEFAULT);
}
+
+ // Tez default classnames are populated as TezConfiguration.TEZ_AM_SERVICE_PLUGINS_DEFAULT
+ private String[] parsePlugins(BiMap<String, Integer> pluginMap, String[] pluginStrings,
+ String context) {
+ Preconditions.checkState(pluginStrings != null && pluginStrings.length > 0,
+ "Plugin strings should not be null or empty: " + context);
+
+ String[] classNames = new String[pluginStrings.length];
+
+ int index = 0;
+ for (String pluginString : pluginStrings) {
+
+ String className;
+ String identifierString;
+
+ Preconditions.checkState(pluginString != null && !pluginString.isEmpty(),
+ "Plugin string: " + pluginString + " should not be null or empty");
+ if (pluginString.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT) ||
+ pluginString.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
+ // Kind of ugly, but Tez internal routing is encoded via a String instead of classnames.
+ // Individual components - TaskComm, Scheduler, Launcher deal with actual classname translation,
+ // and avoid reflection.
+ identifierString = pluginString;
+ className = pluginString;
+ } else {
+ String[] parts = pluginString.split(":");
+ Preconditions.checkState(
+ parts.length == 2 && parts[0] != null && !parts[0].isEmpty() && parts[1] != null &&
+ !parts[1].isEmpty(),
+ "Invalid configuration string for " + context + ": " + pluginString);
+ Preconditions.checkState(
+ !parts[0].equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT) &&
+ !parts[0].equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT),
+ "Identifier cannot be " + TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT + " or " +
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT + " for " +
+ pluginString);
+ identifierString = parts[0];
+ className = parts[1];
+ }
+ pluginMap.put(identifierString, index);
+ classNames[index] = className;
+ }
+ return classNames;
+ }
+
+ String buildPluginComponentLog(String[] classIdentifiers, BiMap<String, Integer> map,
+ String component) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("AM Level configured ").append(component).append(": ");
+ for (int i = 0; i < classIdentifiers.length; i++) {
+ sb.append("[").append(i).append(":").append(map.inverse().get(i)).append(":")
+ .append(taskSchedulers.inverse().get(i)).append(
+ "]");
+ if (i != classIdentifiers.length - 1) {
+ sb.append(",");
+ }
+ }
+ return sb.toString();
+ }
}
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index 0d9dc31..2f6dcf5 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -38,7 +38,6 @@ import org.apache.tez.runtime.api.impl.EventType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
@@ -47,7 +46,7 @@ import org.apache.tez.common.ReflectionUtils;
import org.apache.tez.dag.api.TaskCommunicator;
import org.apache.tez.dag.api.TaskCommunicatorContext;
import org.apache.tez.dag.api.TaskHeartbeatResponse;
-import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezConstants;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -57,7 +56,6 @@ import org.apache.tez.dag.app.dag.DAG;
import org.apache.tez.dag.app.dag.Task;
import org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely;
import org.apache.tez.dag.app.dag.event.VertexEventRouteEvent;
-import org.apache.tez.dag.app.rm.TaskSchedulerService;
import org.apache.tez.dag.app.rm.container.AMContainerTask;
import org.apache.tez.dag.records.TezTaskAttemptID;
import org.apache.tez.dag.records.TezVertexID;
@@ -74,7 +72,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
.getLogger(TaskAttemptListenerImpTezDag.class);
private final AppContext context;
- private TaskCommunicator taskCommunicator;
+ private final TaskCommunicator[] taskCommunicators;
protected final TaskHeartbeatHandler taskHeartbeatHandler;
protected final ContainerHeartbeatHandler containerHeartbeatHandler;
@@ -100,28 +98,52 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
public TaskAttemptListenerImpTezDag(AppContext context,
TaskHeartbeatHandler thh, ContainerHeartbeatHandler chh,
// TODO TEZ-2003 pre-merge. Remove reference to JobTokenSecretManager.
- JobTokenSecretManager jobTokenSecretManager) {
+ JobTokenSecretManager jobTokenSecretManager,
+ String [] taskCommunicatorClassIdentifiers) {
super(TaskAttemptListenerImpTezDag.class.getName());
this.context = context;
this.taskHeartbeatHandler = thh;
this.containerHeartbeatHandler = chh;
- this.taskCommunicator = new TezTaskCommunicatorImpl(this);
+ if (taskCommunicatorClassIdentifiers == null || taskCommunicatorClassIdentifiers.length == 0) {
+ taskCommunicatorClassIdentifiers = new String[] {TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+ }
+ this.taskCommunicators = new TaskCommunicator[taskCommunicatorClassIdentifiers.length];
+ for (int i = 0 ; i < taskCommunicatorClassIdentifiers.length ; i++) {
+ taskCommunicators[i] = createTaskCommunicator(taskCommunicatorClassIdentifiers[i]);
+ }
+ // TODO TEZ-2118 Start using taskCommunicator indices properly
+ }
+
+ @Override
+ public void serviceStart() {
+ // TODO Why is init tied to serviceStart
+ for (int i = 0 ; i < taskCommunicators.length ; i++) {
+ taskCommunicators[i].init(getConfig());
+ taskCommunicators[i].start();
+ }
}
@Override
- public void serviceInit(Configuration conf) {
- String taskCommClassName = conf.get(TezConfiguration.TEZ_AM_TASK_COMMUNICATOR_CLASS);
- if (taskCommClassName == null) {
+ public void serviceStop() {
+ for (int i = 0 ; i < taskCommunicators.length ; i++) {
+ taskCommunicators[i].stop();
+ }
+ }
+
+ private TaskCommunicator createTaskCommunicator(String taskCommClassIdentifier) {
+ if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT) ||
+ taskCommClassIdentifier
+ .equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
LOG.info("Using Default Task Communicator");
- this.taskCommunicator = new TezTaskCommunicatorImpl(this);
+ return new TezTaskCommunicatorImpl(this);
} else {
- LOG.info("Using TaskCommunicator: " + taskCommClassName);
+ LOG.info("Using TaskCommunicator: " + taskCommClassIdentifier);
Class<? extends TaskCommunicator> taskCommClazz = (Class<? extends TaskCommunicator>) ReflectionUtils
- .getClazz(taskCommClassName);
+ .getClazz(taskCommClassIdentifier);
try {
Constructor<? extends TaskCommunicator> ctor = taskCommClazz.getConstructor(TaskCommunicatorContext.class);
ctor.setAccessible(true);
- this.taskCommunicator = ctor.newInstance(this);
+ return ctor.newInstance(this);
} catch (NoSuchMethodException e) {
throw new TezUncheckedException(e);
} catch (InvocationTargetException e) {
@@ -135,20 +157,6 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
@Override
- public void serviceStart() {
- taskCommunicator.init(getConfig());
- taskCommunicator.start();
- }
-
- @Override
- public void serviceStop() {
- if (taskCommunicator != null) {
- taskCommunicator.stop();
- taskCommunicator = null;
- }
- }
-
- @Override
public ApplicationAttemptId getApplicationAttemptId() {
return context.getApplicationAttemptId();
}
@@ -236,7 +244,8 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
@Override
public void taskStartedRemotely(TezTaskAttemptID taskAttemptID, ContainerId containerId) {
- context.getEventHandler().handle(new TaskAttemptEventStartedRemotely(taskAttemptID, containerId, null));
+ context.getEventHandler()
+ .handle(new TaskAttemptEventStartedRemotely(taskAttemptID, containerId, null));
pingContainerHeartbeatHandler(containerId);
}
@@ -266,7 +275,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
@Override
public InetSocketAddress getAddress() {
- return taskCommunicator.getAddress();
+ return taskCommunicators[0].getAddress();
}
// The TaskAttemptListener register / unregister methods in this class are not thread safe.
@@ -298,7 +307,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
"Multiple registrations for containerId: " + containerId);
}
NodeId nodeId = context.getAllContainers().get(containerId).getContainer().getNodeId();
- taskCommunicator.registerRunningContainer(containerId, nodeId.getHost(), nodeId.getPort());
+ taskCommunicators[0].registerRunningContainer(containerId, nodeId.getHost(), nodeId.getPort());
}
@Override
@@ -310,7 +319,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
if (containerInfo.taskAttemptId != null) {
registeredAttempts.remove(containerInfo.taskAttemptId);
}
- taskCommunicator.registerContainerEnd(containerId);
+ taskCommunicators[0].registerContainerEnd(containerId);
}
@Override
@@ -345,7 +354,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
+ amContainerTask.getTask().getTaskAttemptID() + " to container: " + containerId
+ " when already assigned to: " + containerIdFromMap);
}
- taskCommunicator.registerRunningTaskAttempt(containerId, amContainerTask.getTask(),
+ taskCommunicators[0].registerRunningTaskAttempt(containerId, amContainerTask.getTask(),
amContainerTask.getAdditionalResources(), amContainerTask.getCredentials(),
amContainerTask.haveCredentialsChanged());
}
@@ -365,7 +374,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
// Explicitly putting in a new entry so that synchronization is not required on the existing element in the map.
registeredContainers.put(containerId, NULL_CONTAINER_INFO);
- taskCommunicator.unregisterRunningTaskAttempt(attemptId);
+ taskCommunicators[0].unregisterRunningTaskAttempt(attemptId);
}
private void pingContainerHeartbeatHandler(ContainerId containerId) {
@@ -383,6 +392,6 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
public TaskCommunicator getTaskCommunicator() {
- return taskCommunicator;
+ return taskCommunicators[0];
}
}
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
index bb42392..8b60dc3 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
@@ -173,4 +173,8 @@ public interface Vertex extends Comparable<Vertex> {
public int getKilledTaskAttemptCount();
public Configuration getConf();
+
+ public int getTaskSchedulerIdentifier();
+ public int getContainerLauncherIdentifier();
+ public int getTaskCommunicatorIdentifier();
}
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
index b1c0acc..c18dc00 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
@@ -1066,9 +1066,15 @@ public class TaskAttemptImpl implements TaskAttempt,
priority = (scheduleEvent.getPriorityHighLimit() + scheduleEvent.getPriorityLowLimit()) / 2;
}
+ // TODO Jira post TEZ-2003 getVertex implementation is very inefficient. This should be via references, instead of locked table lookups.
+ Vertex vertex = ta.getVertex();
AMSchedulerEventTALaunchRequest launchRequestEvent = new AMSchedulerEventTALaunchRequest(
ta.attemptId, ta.taskResource, remoteTaskSpec, ta, locationHint,
- priority, ta.containerContext);
+ priority, ta.containerContext,
+ vertex.getTaskSchedulerIdentifier(),
+ vertex.getContainerLauncherIdentifier(),
+ vertex.getTaskCommunicatorIdentifier());
+
ta.sendEvent(launchRequestEvent);
return TaskAttemptStateInternal.START_WAIT;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
index 6b208b0..097cf3d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
@@ -73,6 +73,7 @@ import org.apache.tez.dag.api.ProcessorDescriptor;
import org.apache.tez.dag.api.RootInputLeafOutput;
import org.apache.tez.dag.api.Scope;
import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezConstants;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.api.VertexLocationHint;
import org.apache.tez.dag.api.TaskLocationHint;
@@ -230,6 +231,10 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
@VisibleForTesting
public boolean useOnDemandRouting = true;
+ private final int taskSchedulerIdentifier;
+ private final int containerLauncherIdentifier;
+ private final int taskCommunicatorIdentifier;
+
//fields initialized in init
@VisibleForTesting
@@ -959,6 +964,33 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
// This "this leak" is okay because the retained pointer is in an
// instance variable.
+ boolean isLocal = vertexConf.getBoolean(TezConfiguration.TEZ_LOCAL_MODE,
+ TezConfiguration.TEZ_LOCAL_MODE_DEFAULT);
+
+ String tezDefaultComponentName =
+ isLocal ? TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT :
+ TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT;
+ String taskSchedulerName =
+ vertexConf.get(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME, tezDefaultComponentName);
+ String taskCommName = vertexConf
+ .get(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME, tezDefaultComponentName);
+ String containerLauncherName = vertexConf
+ .get(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME, tezDefaultComponentName);
+ taskSchedulerIdentifier = appContext.getTaskScheduerIdentifier(taskSchedulerName);
+ taskCommunicatorIdentifier = appContext.getTaskCommunicatorIdentifier(taskCommName);
+ containerLauncherIdentifier = appContext.getContainerLauncherIdentifier(containerLauncherName);
+
+ Preconditions.checkNotNull(taskSchedulerIdentifier, "Unknown taskScheduler: " + taskSchedulerName);
+ Preconditions.checkNotNull(taskCommunicatorIdentifier, "Unknown taskCommunicator: " + containerLauncherName);
+ Preconditions.checkNotNull(containerLauncherIdentifier, "Unknown containerLauncher: " + taskCommName);
+
+ StringBuilder sb = new StringBuilder();
+ sb.append("Running vertex: ").append(logIdentifier).append(" : ")
+ .append("TaskScheduler=").append(taskSchedulerIdentifier).append(":").append(taskSchedulerName)
+ .append(", ContainerLauncher=").append(containerLauncherIdentifier).append(":").append(containerLauncherName)
+ .append(", TaskCommunicator=").append(taskCommunicatorIdentifier).append(":").append(taskCommName);
+ LOG.info(sb.toString());
+
stateMachine = new StateMachineTez<VertexState, VertexEventType, VertexEvent, VertexImpl>(
stateMachineFactory.make(this), this);
augmentStateMachine();
@@ -969,6 +1001,21 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
return vertexConf;
}
+ @Override
+ public int getTaskSchedulerIdentifier() {
+ return this.taskSchedulerIdentifier;
+ }
+
+ @Override
+ public int getContainerLauncherIdentifier() {
+ return this.containerLauncherIdentifier;
+ }
+
+ @Override
+ public int getTaskCommunicatorIdentifier() {
+ return this.taskCommunicatorIdentifier;
+ }
+
private boolean isSpeculationEnabled() {
return isSpeculationEnabled;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
index 34001ed..621e4a8 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.tez.common.ReflectionUtils;
import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezConstants;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.app.AppContext;
import org.apache.tez.dag.app.TaskAttemptListener;
@@ -36,73 +37,93 @@ public class ContainerLauncherRouter extends AbstractService
static final Log LOG = LogFactory.getLog(ContainerLauncherImpl.class);
- private final ContainerLauncher containerLauncher;
+ private final ContainerLauncher containerLaunchers[];
@VisibleForTesting
public ContainerLauncherRouter(ContainerLauncher containerLauncher) {
super(ContainerLauncherRouter.class.getName());
- this.containerLauncher = containerLauncher;
+ containerLaunchers = new ContainerLauncher[] {containerLauncher};
}
// Accepting conf to setup final parameters, if required.
- public ContainerLauncherRouter(Configuration conf, boolean isLocal, AppContext context,
+ public ContainerLauncherRouter(Configuration conf, AppContext context,
TaskAttemptListener taskAttemptListener,
- String workingDirectory) throws UnknownHostException {
+ String workingDirectory,
+ String[] containerLauncherClassIdentifiers) throws UnknownHostException {
super(ContainerLauncherRouter.class.getName());
- if (isLocal) {
+ if (containerLauncherClassIdentifiers == null || containerLauncherClassIdentifiers.length == 0) {
+ containerLauncherClassIdentifiers = new String[] {TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+ }
+ containerLaunchers = new ContainerLauncher[containerLauncherClassIdentifiers.length];
+
+ for (int i = 0; i < containerLauncherClassIdentifiers.length; i++) {
+ containerLaunchers[i] = createContainerLauncher(containerLauncherClassIdentifiers[i], context,
+ taskAttemptListener, workingDirectory, conf);
+ }
+ }
+
+ private ContainerLauncher createContainerLauncher(String containerLauncherClassIdentifier,
+ AppContext context,
+ TaskAttemptListener taskAttemptListener,
+ String workingDirectory,
+ Configuration conf) throws
+ UnknownHostException {
+ if (containerLauncherClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
+ LOG.info("Creating DefaultContainerLauncher");
+ return new ContainerLauncherImpl(context);
+ } else if (containerLauncherClassIdentifier
+ .equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
LOG.info("Creating LocalContainerLauncher");
- containerLauncher =
+ return
new LocalContainerLauncher(context, taskAttemptListener, workingDirectory);
} else {
- // TODO: Temporary reflection with specific parameters until a clean interface is defined.
- String containerLauncherClassName =
- conf.get(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHER_CLASS);
- if (containerLauncherClassName == null) {
- LOG.info("Creating Default Container Launcher");
- containerLauncher = new ContainerLauncherImpl(context);
- } else {
- LOG.info("Creating container launcher : " + containerLauncherClassName);
- Class<? extends ContainerLauncher> containerLauncherClazz =
- (Class<? extends ContainerLauncher>) ReflectionUtils.getClazz(
- containerLauncherClassName);
- try {
- Constructor<? extends ContainerLauncher> ctor = containerLauncherClazz
- .getConstructor(AppContext.class, Configuration.class, TaskAttemptListener.class);
- ctor.setAccessible(true);
- containerLauncher = ctor.newInstance(context, conf, taskAttemptListener);
- } catch (NoSuchMethodException e) {
- throw new TezUncheckedException(e);
- } catch (InvocationTargetException e) {
- throw new TezUncheckedException(e);
- } catch (InstantiationException e) {
- throw new TezUncheckedException(e);
- } catch (IllegalAccessException e) {
- throw new TezUncheckedException(e);
- }
+ LOG.info("Creating container launcher : " + containerLauncherClassIdentifier);
+ Class<? extends ContainerLauncher> containerLauncherClazz =
+ (Class<? extends ContainerLauncher>) ReflectionUtils.getClazz(
+ containerLauncherClassIdentifier);
+ try {
+ Constructor<? extends ContainerLauncher> ctor = containerLauncherClazz
+ .getConstructor(AppContext.class, Configuration.class, TaskAttemptListener.class);
+ ctor.setAccessible(true);
+ return ctor.newInstance(context, conf, taskAttemptListener);
+ } catch (NoSuchMethodException e) {
+ throw new TezUncheckedException(e);
+ } catch (InvocationTargetException e) {
+ throw new TezUncheckedException(e);
+ } catch (InstantiationException e) {
+ throw new TezUncheckedException(e);
+ } catch (IllegalAccessException e) {
+ throw new TezUncheckedException(e);
}
-
}
+ // TODO TEZ-2118 Handle routing to multiple launchers
}
@Override
public void serviceInit(Configuration conf) {
- ((AbstractService)containerLauncher).init(conf);
+ for (int i = 0 ; i < containerLaunchers.length ; i++) {
+ ((AbstractService) containerLaunchers[i]).init(conf);
+ }
}
@Override
public void serviceStart() {
- ((AbstractService)containerLauncher).start();
+ for (int i = 0 ; i < containerLaunchers.length ; i++) {
+ ((AbstractService) containerLaunchers[i]).start();
+ }
}
@Override
public void serviceStop() {
- ((AbstractService)containerLauncher).stop();
+ for (int i = 0 ; i < containerLaunchers.length ; i++) {
+ ((AbstractService) containerLaunchers[i]).stop();
+ }
}
@Override
public void handle(NMCommunicatorEvent event) {
- containerLauncher.handle(event);
+ containerLaunchers[0].handle(event);
}
}
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTALaunchRequest.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTALaunchRequest.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTALaunchRequest.java
index 5c4d43c..c59193c 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTALaunchRequest.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTALaunchRequest.java
@@ -38,11 +38,16 @@ public class AMSchedulerEventTALaunchRequest extends AMSchedulerEvent {
private final TaskSpec remoteTaskSpec;
private final TaskAttempt taskAttempt;
+ private final int schedulerId;
+ private final int launcherId;
+ private final int taskCommId;
+
public AMSchedulerEventTALaunchRequest(TezTaskAttemptID attemptId,
Resource capability,
TaskSpec remoteTaskSpec, TaskAttempt ta,
TaskLocationHint locationHint, int priority,
- ContainerContext containerContext) {
+ ContainerContext containerContext,
+ int schedulerId, int launcherId, int taskCommId) {
super(AMSchedulerEventType.S_TA_LAUNCH_REQUEST);
this.attemptId = attemptId;
this.capability = capability;
@@ -51,6 +56,9 @@ public class AMSchedulerEventTALaunchRequest extends AMSchedulerEvent {
this.locationHint = locationHint;
this.priority = priority;
this.containerContext = containerContext;
+ this.schedulerId = schedulerId;
+ this.launcherId = launcherId;
+ this.taskCommId = taskCommId;
}
public TezTaskAttemptID getAttemptID() {
@@ -81,6 +89,18 @@ public class AMSchedulerEventTALaunchRequest extends AMSchedulerEvent {
return this.containerContext;
}
+ public int getSchedulerId() {
+ return schedulerId;
+ }
+
+ public int getLauncherId() {
+ return launcherId;
+ }
+
+ public int getTaskCommId() {
+ return taskCommId;
+ }
+
// Parameter replacement: @taskid@ will not be usable
// ProfileTaskRange not available along with ContainerReUse
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
index 8c3ed87..72389e7 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.yarn.event.Event;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.tez.common.ReflectionUtils;
import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezConstants;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.api.TaskLocationHint;
import org.apache.tez.dag.api.TaskLocationHint.TaskBasedLocationAffinity;
@@ -92,7 +93,6 @@ public class TaskSchedulerEventHandler extends AbstractService
@SuppressWarnings("rawtypes")
private final EventHandler eventHandler;
private final String historyUrl;
- protected TaskSchedulerService taskScheduler;
private DAGAppMaster dagAppMaster;
private Map<ApplicationAccessType, String> appAcls = null;
private Thread eventHandlingThread;
@@ -105,14 +105,27 @@ public class TaskSchedulerEventHandler extends AbstractService
private AtomicBoolean shouldUnregisterFlag =
new AtomicBoolean(false);
private final WebUIService webUI;
+ private final String[] taskSchedulerClasses;
+ protected final TaskSchedulerService []taskSchedulers;
BlockingQueue<AMSchedulerEvent> eventQueue
= new LinkedBlockingQueue<AMSchedulerEvent>();
+ /**
+ *
+ * @param appContext
+ * @param clientService
+ * @param eventHandler
+ * @param containerSignatureMatcher
+ * @param webUI
+ * @param schedulerClasses the list of scheduler classes / codes. Tez internal classes are represented as codes.
+ * An empty list defaults to using the YarnTaskScheduler as the only source.
+ */
@SuppressWarnings("rawtypes")
public TaskSchedulerEventHandler(AppContext appContext,
DAGClientServer clientService, EventHandler eventHandler,
- ContainerSignatureMatcher containerSignatureMatcher, WebUIService webUI) {
+ ContainerSignatureMatcher containerSignatureMatcher, WebUIService webUI,
+ String [] schedulerClasses) {
super(TaskSchedulerEventHandler.class.getName());
this.appContext = appContext;
this.eventHandler = eventHandler;
@@ -123,6 +136,12 @@ public class TaskSchedulerEventHandler extends AbstractService
if (this.webUI != null) {
this.webUI.setHistoryUrl(this.historyUrl);
}
+ if (schedulerClasses == null || schedulerClasses.length == 0) {
+ this.taskSchedulerClasses = new String[] {TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT};
+ } else {
+ this.taskSchedulerClasses = schedulerClasses;
+ }
+ taskSchedulers = new TaskSchedulerService[this.taskSchedulerClasses.length];
}
public Map<ApplicationAccessType, String> getApplicationAcls() {
@@ -139,11 +158,11 @@ public class TaskSchedulerEventHandler extends AbstractService
}
public Resource getAvailableResources() {
- return taskScheduler.getAvailableResources();
+ return taskSchedulers[0].getAvailableResources();
}
public Resource getTotalResources() {
- return taskScheduler.getTotalResources();
+ return taskSchedulers[0].getTotalResources();
}
public synchronized void handleEvent(AMSchedulerEvent sEvent) {
@@ -209,9 +228,9 @@ public class TaskSchedulerEventHandler extends AbstractService
private void handleNodeBlacklistUpdate(AMSchedulerEventNodeBlacklistUpdate event) {
if (event.getType() == AMSchedulerEventType.S_NODE_BLACKLISTED) {
- taskScheduler.blacklistNode(event.getNodeId());
+ taskSchedulers[0].blacklistNode(event.getNodeId());
} else if (event.getType() == AMSchedulerEventType.S_NODE_UNBLACKLISTED) {
- taskScheduler.unblacklistNode(event.getNodeId());
+ taskSchedulers[0].unblacklistNode(event.getNodeId());
} else {
throw new TezUncheckedException("Invalid event type: " + event.getType());
}
@@ -223,14 +242,14 @@ public class TaskSchedulerEventHandler extends AbstractService
// TODO what happens to the task that was connected to this container?
// current assumption is that it will eventually call handleTaStopRequest
//TaskAttempt taskAttempt = (TaskAttempt)
- taskScheduler.deallocateContainer(containerId);
+ taskSchedulers[0].deallocateContainer(containerId);
// TODO does this container need to be stopped via C_STOP_REQUEST
sendEvent(new AMContainerEventStopRequest(containerId));
}
private void handleTAUnsuccessfulEnd(AMSchedulerEventTAEnded event) {
TaskAttempt attempt = event.getAttempt();
- boolean wasContainerAllocated = taskScheduler.deallocateTask(attempt, false);
+ boolean wasContainerAllocated = taskSchedulers[0].deallocateTask(attempt, false);
// use stored value of container id in case the scheduler has removed this
// assignment because the task has been deallocated earlier.
// retroactive case
@@ -272,7 +291,7 @@ public class TaskSchedulerEventHandler extends AbstractService
event.getAttemptID()));
}
- boolean wasContainerAllocated = taskScheduler.deallocateTask(attempt, true);
+ boolean wasContainerAllocated = taskSchedulers[0].deallocateTask(attempt, true);
if (!wasContainerAllocated) {
LOG.error("De-allocated successful task: " + attempt.getID()
+ ", but TaskScheduler reported no container assigned to task");
@@ -297,7 +316,7 @@ public class TaskSchedulerEventHandler extends AbstractService
TaskAttempt affinityAttempt = vertex.getTask(taskIndex).getSuccessfulAttempt();
if (affinityAttempt != null) {
Preconditions.checkNotNull(affinityAttempt.getAssignedContainerID(), affinityAttempt.getID());
- taskScheduler.allocateTask(taskAttempt,
+ taskSchedulers[0].allocateTask(taskAttempt,
event.getCapability(),
affinityAttempt.getAssignedContainerID(),
Priority.newInstance(event.getPriority()),
@@ -316,57 +335,59 @@ public class TaskSchedulerEventHandler extends AbstractService
.toArray(new String[locationHint.getRacks().size()]) : null;
}
}
-
- taskScheduler.allocateTask(taskAttempt,
- event.getCapability(),
- hosts,
- racks,
- Priority.newInstance(event.getPriority()),
- event.getContainerContext(),
- event);
- }
-
-
- protected TaskSchedulerService createTaskScheduler(String host, int port,
- String trackingUrl, AppContext appContext) {
- boolean isLocal = getConfig().getBoolean(TezConfiguration.TEZ_LOCAL_MODE,
- TezConfiguration.TEZ_LOCAL_MODE_DEFAULT);
- if (isLocal) {
- LOG.info("Using TaskScheduler: LocalTaskSchedulerService");
+
+ taskSchedulers[0].allocateTask(taskAttempt,
+ event.getCapability(),
+ hosts,
+ racks,
+ Priority.newInstance(event.getPriority()),
+ event.getContainerContext(),
+ event);
+ }
+
+ private TaskSchedulerService createTaskScheduler(String host, int port, String trackingUrl,
+ AppContext appContext,
+ String schedulerClassName) {
+ if (schedulerClassName.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
+ LOG.info("Creating TaskScheduler: YarnTaskSchedulerService");
+ return new YarnTaskSchedulerService(this, this.containerSignatureMatcher,
+ host, port, trackingUrl, appContext);
+ } else if (schedulerClassName.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
+ LOG.info("Creating TaskScheduler: Local TaskScheduler");
return new LocalTaskSchedulerService(this, this.containerSignatureMatcher,
host, port, trackingUrl, appContext);
- }
- else {
- String schedulerClassName = getConfig().get(TezConfiguration.TEZ_AM_TASK_SCHEDULER_CLASS);
- if (schedulerClassName == null) {
- LOG.info("Using TaskScheduler: YarnTaskSchedulerService");
- return new YarnTaskSchedulerService(this, this.containerSignatureMatcher,
- host, port, trackingUrl, appContext);
- } else {
- LOG.info("Using custom TaskScheduler: " + schedulerClassName);
- // TODO Temporary reflection with specific parameters. Remove once there is a clean interface.
- Class<? extends TaskSchedulerService> taskSchedulerClazz =
- (Class<? extends TaskSchedulerService>) ReflectionUtils.getClazz(schedulerClassName);
- try {
- Constructor<? extends TaskSchedulerService> ctor = taskSchedulerClazz
- .getConstructor(TaskSchedulerAppCallback.class, AppContext.class, String.class,
- int.class, String.class, Configuration.class);
- ctor.setAccessible(true);
- TaskSchedulerService taskSchedulerService =
- ctor.newInstance(this, appContext, host, port, trackingUrl, getConfig());
- return taskSchedulerService;
- } catch (NoSuchMethodException e) {
- throw new TezUncheckedException(e);
- } catch (InvocationTargetException e) {
- throw new TezUncheckedException(e);
- } catch (InstantiationException e) {
- throw new TezUncheckedException(e);
- } catch (IllegalAccessException e) {
- throw new TezUncheckedException(e);
- }
+ } else {
+ LOG.info("Creating custom TaskScheduler: " + schedulerClassName);
+ // TODO TEZ-2003 Temporary reflection with specific parameters. Remove once there is a clean interface.
+ Class<? extends TaskSchedulerService> taskSchedulerClazz =
+ (Class<? extends TaskSchedulerService>) ReflectionUtils.getClazz(schedulerClassName);
+ try {
+ Constructor<? extends TaskSchedulerService> ctor = taskSchedulerClazz
+ .getConstructor(TaskSchedulerAppCallback.class, AppContext.class, String.class,
+ int.class, String.class, Configuration.class);
+ ctor.setAccessible(true);
+ return ctor.newInstance(this, appContext, host, port, trackingUrl, getConfig());
+ } catch (NoSuchMethodException e) {
+ throw new TezUncheckedException(e);
+ } catch (InvocationTargetException e) {
+ throw new TezUncheckedException(e);
+ } catch (InstantiationException e) {
+ throw new TezUncheckedException(e);
+ } catch (IllegalAccessException e) {
+ throw new TezUncheckedException(e);
}
}
}
+
+ @VisibleForTesting
+ protected void instantiateScheduelrs(String host, int port, String trackingUrl, AppContext appContext) {
+ // Iterate over the list and create all the taskSchedulers
+ for (int i = 0; i < taskSchedulerClasses.length; i++) {
+ taskSchedulers[i] = createTaskScheduler(host, port,
+ trackingUrl, appContext, taskSchedulerClasses[i]);
+ }
+ }
+
@Override
public synchronized void serviceStart() {
@@ -377,13 +398,17 @@ public class TaskSchedulerEventHandler extends AbstractService
// always try to connect to AM and proxy the response. hence it wont work if the webUIService
// is not enabled.
String trackingUrl = (webUI != null) ? webUI.getTrackingURL() : "";
- taskScheduler = createTaskScheduler(serviceAddr.getHostName(),
- serviceAddr.getPort(), trackingUrl, appContext);
- taskScheduler.init(getConfig());
- taskScheduler.start();
+ instantiateScheduelrs(serviceAddr.getHostName(), serviceAddr.getPort(), trackingUrl, appContext);
+
+ for (int i = 0 ; i < taskSchedulers.length ; i++) {
+ taskSchedulers[i].init(getConfig());
+ taskSchedulers[i].start();
+ }
+
+ // TODO TEZ-2118 Start using multiple task schedulers
if (shouldUnregisterFlag.get()) {
// Flag may have been set earlier when task scheduler was not initialized
- taskScheduler.setShouldUnregister();
+ taskSchedulers[0].setShouldUnregister();
}
this.eventHandlingThread = new Thread("TaskSchedulerEventHandlerThread") {
@@ -432,8 +457,8 @@ public class TaskSchedulerEventHandler extends AbstractService
if (eventHandlingThread != null)
eventHandlingThread.interrupt();
}
- if (taskScheduler != null) {
- ((AbstractService)taskScheduler).stop();
+ if (taskSchedulers[0] != null) {
+ ((AbstractService)taskSchedulers[0]).stop();
}
}
@@ -578,7 +603,7 @@ public class TaskSchedulerEventHandler extends AbstractService
public float getProgress() {
// at this point allocate has been called and so node count must be available
// may change after YARN-1722
- int nodeCount = taskScheduler.getClusterNodeCount();
+ int nodeCount = taskSchedulers[0].getClusterNodeCount();
if (nodeCount != cachedNodeCount) {
cachedNodeCount = nodeCount;
sendEvent(new AMNodeEventNodeCountUpdated(cachedNodeCount));
@@ -593,7 +618,7 @@ public class TaskSchedulerEventHandler extends AbstractService
}
public void dagCompleted() {
- taskScheduler.dagComplete();
+ taskSchedulers[0].dagComplete();
}
public void dagSubmitted() {
@@ -603,7 +628,7 @@ public class TaskSchedulerEventHandler extends AbstractService
@Override
public void preemptContainer(ContainerId containerId) {
- taskScheduler.deallocateContainer(containerId);
+ taskSchedulers[0].deallocateContainer(containerId);
// Inform the Containers about completion.
sendEvent(new AMContainerEventCompleted(containerId, ContainerExitStatus.INVALID,
"Container preempted internally", TaskAttemptTerminationCause.INTERNAL_PREEMPTION));
@@ -612,13 +637,13 @@ public class TaskSchedulerEventHandler extends AbstractService
public void setShouldUnregisterFlag() {
LOG.info("TaskScheduler notified that it should unregister from RM");
this.shouldUnregisterFlag.set(true);
- if (this.taskScheduler != null) {
- this.taskScheduler.setShouldUnregister();
+ if (this.taskSchedulers[0] != null) {
+ this.taskSchedulers[0].setShouldUnregister();
}
}
public boolean hasUnregistered() {
- return this.taskScheduler.hasUnregistered();
+ return this.taskSchedulers[0].hasUnregistered();
}
@VisibleForTesting
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index 7274cde..aace92b 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -507,10 +507,11 @@ public class MockDAGAppMaster extends DAGAppMaster {
this.handlerConcurrency = handlerConcurrency;
this.numConcurrentContainers = numConcurrentContainers;
}
-
+
// use mock container launcher for tests
@Override
- protected ContainerLauncherRouter createContainerLauncherRouter(final Configuration conf)
+ protected ContainerLauncherRouter createContainerLauncherRouter(final Configuration conf,
+ String[] containerLaunchers)
throws UnknownHostException {
return new ContainerLauncherRouter(containerLauncher);
}
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
index 46c412e..33f4817 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskAttemptListenerImplTezDag.java
@@ -32,6 +32,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -109,18 +110,9 @@ public class TestTaskAttemptListenerImplTezDag {
doReturn(dag).when(appContext).getCurrentDAG();
doReturn(appAcls).when(appContext).getApplicationACLs();
doReturn(amContainerMap).when(appContext).getAllContainers();
- NodeId nodeId = NodeId.newInstance("localhost", 0);
- AMContainer amContainer = mock(AMContainer.class);
- Container container = mock(Container.class);
- doReturn(nodeId).when(container).getNodeId();
- doReturn(amContainer).when(amContainerMap).get(any(ContainerId.class));
- doReturn(container).when(amContainer).getContainer();
-
- taskAttemptListener =
- new TaskAttemptListenerImpTezDag(appContext, mock(TaskHeartbeatHandler.class),
- mock(ContainerHeartbeatHandler.class), null);
- TezTaskCommunicatorImpl taskCommunicator = (TezTaskCommunicatorImpl)taskAttemptListener.getTaskCommunicator();
- TezTaskUmbilicalProtocol tezUmbilical = taskCommunicator.getUmbilical();
+
+ taskAttemptListener = new TaskAttemptListenerImplForTest(appContext,
+ mock(TaskHeartbeatHandler.class), mock(ContainerHeartbeatHandler.class), null);
taskSpec = mock(TaskSpec.class);
doReturn(taskAttemptID).when(taskSpec).getTaskAttemptID();
@@ -131,6 +123,9 @@ public class TestTaskAttemptListenerImplTezDag {
@Test(timeout = 5000)
public void testGetTask() throws IOException {
+ TezTaskCommunicatorImpl taskCommunicator = (TezTaskCommunicatorImpl)taskAttemptListener.getTaskCommunicator();
+ TezTaskUmbilicalProtocol tezUmbilical = taskCommunicator.getUmbilical();
+
ContainerId containerId1 = createContainerId(appId, 1);
ContainerContext containerContext1 = new ContainerContext(containerId1.toString());
containerTask = tezUmbilical.getTask(containerContext1);
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
index 89b77a7..54b9adb 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java
@@ -1322,7 +1322,7 @@ public class TestContainerReuse {
InputDescriptor.create("inputClassName"), 1)),
Collections.singletonList(new OutputSpec("vertexName",
OutputDescriptor.create("outputClassName"), 1)), null), ta, locationHint,
- priority.getPriority(), containerContext);
+ priority.getPriority(), containerContext, 0, 0, 0);
return lr;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
index af3e40d..291e786 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
@@ -89,13 +89,13 @@ public class TestTaskSchedulerEventHandler {
public MockTaskSchedulerEventHandler(AppContext appContext,
DAGClientServer clientService, EventHandler eventHandler,
ContainerSignatureMatcher containerSignatureMatcher, WebUIService webUI) {
- super(appContext, clientService, eventHandler, containerSignatureMatcher, webUI);
+ super(appContext, clientService, eventHandler, containerSignatureMatcher, webUI, new String[] {});
}
-
+
@Override
- protected TaskSchedulerService createTaskScheduler(String host, int port,
- String trackingUrl, AppContext appContext) {
- return mockTaskScheduler;
+ protected void instantiateScheduelrs(String host, int port, String trackingUrl,
+ AppContext appContext) {
+ taskSchedulers[0] = mockTaskScheduler;
}
@Override
@@ -194,7 +194,7 @@ public class TestTaskSchedulerEventHandler {
when(mockAppContext.getCurrentDAG().getVertex(affVertexName)).thenReturn(affVertex);
Resource resource = Resource.newInstance(100, 1);
AMSchedulerEventTALaunchRequest event = new AMSchedulerEventTALaunchRequest
- (taId, resource, null, mockTaskAttempt, locHint, 3, null);
+ (taId, resource, null, mockTaskAttempt, locHint, 3, null, 0, 0, 0);
schedulerHandler.notify.set(false);
schedulerHandler.handle(event);
synchronized (schedulerHandler.notify) {
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
index 77c98b7..d775300 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
@@ -127,31 +127,29 @@ class TestTaskSchedulerHelpers {
EventHandler eventHandler,
TezAMRMClientAsync<CookieContainerRequest> amrmClientAsync,
ContainerSignatureMatcher containerSignatureMatcher) {
- super(appContext, null, eventHandler, containerSignatureMatcher, null);
+ super(appContext, null, eventHandler, containerSignatureMatcher, null, new String[]{});
this.amrmClientAsync = amrmClientAsync;
this.containerSignatureMatcher = containerSignatureMatcher;
}
@Override
- public TaskSchedulerService createTaskScheduler(String host, int port,
- String trackingUrl, AppContext appContext) {
- return new TaskSchedulerWithDrainableAppCallback(this,
+ public void instantiateScheduelrs(String host, int port, String trackingUrl, AppContext appContext) {
+ taskSchedulers[0] = new TaskSchedulerWithDrainableAppCallback(this,
containerSignatureMatcher, host, port, trackingUrl, amrmClientAsync,
appContext);
}
public TaskSchedulerService getSpyTaskScheduler() {
- return this.taskScheduler;
+ return taskSchedulers[0];
}
@Override
public void serviceStart() {
- TaskSchedulerService taskSchedulerReal = createTaskScheduler("host", 0, "",
- appContext);
+ instantiateScheduelrs("host", 0, "", appContext);
// Init the service so that reuse configuration is picked up.
- ((AbstractService)taskSchedulerReal).init(getConfig());
- ((AbstractService)taskSchedulerReal).start();
- taskScheduler = spy(taskSchedulerReal);
+ ((AbstractService)taskSchedulers[0]).init(getConfig());
+ ((AbstractService)taskSchedulers[0]).start();
+ taskSchedulers[0] = spy(taskSchedulers[0]);
}
@Override
http://git-wip-us.apache.org/repos/asf/tez/blob/cee48099/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
index a93c1a4..ae7e7f8 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
@@ -45,6 +45,8 @@ public class TestExternalTezServices {
private static final Log LOG = LogFactory.getLog(TestExternalTezServices.class);
+ private static final String EXT_PUSH_ENTITY_NAME = "ExtServiceTestPush";
+
private static MiniTezCluster tezCluster;
private static MiniDFSCluster dfsCluster;
private static MiniTezTestServiceCluster tezTestServiceCluster;
@@ -106,12 +108,17 @@ public class TestExternalTezServices {
remoteFs.mkdirs(stagingDirPath);
// This is currently configured to push tasks into the Service, and then use the standard RPC
confForJobs.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
- confForJobs.set(TezConfiguration.TEZ_AM_TASK_SCHEDULER_CLASS,
- TezTestServiceTaskSchedulerService.class.getName());
- confForJobs.set(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHER_CLASS,
- TezTestServiceNoOpContainerLauncher.class.getName());
- confForJobs.set(TezConfiguration.TEZ_AM_TASK_COMMUNICATOR_CLASS,
- TezTestServiceTaskCommunicatorImpl.class.getName());
+ confForJobs.set(TezConfiguration.TEZ_AM_TASK_SCHEDULERS,
+ EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceTaskSchedulerService.class.getName());
+ confForJobs.set(TezConfiguration.TEZ_AM_CONTAINER_LAUNCHERS,
+ EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceNoOpContainerLauncher.class.getName());
+ confForJobs.set(TezConfiguration.TEZ_AM_TASK_COMMUNICATORS,
+ EXT_PUSH_ENTITY_NAME + ":" + TezTestServiceTaskCommunicatorImpl.class.getName());
+
+ confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_SCHEDULER_NAME, EXT_PUSH_ENTITY_NAME);
+ confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_CONTAINER_LAUNCHER_NAME, EXT_PUSH_ENTITY_NAME);
+ confForJobs.set(TezConfiguration.TEZ_AM_VERTEX_TASK_COMMUNICATOR_NAME, EXT_PUSH_ENTITY_NAME);
+
TezConfiguration tezConf = new TezConfiguration(confForJobs);
[39/43] tez git commit: TEZ-2361. Propagate dag completion to
TaskCommunicator. (sseth)
Posted by ss...@apache.org.
TEZ-2361. Propagate dag completion to TaskCommunicator. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/01374671
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/01374671
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/01374671
Branch: refs/heads/TEZ-2003
Commit: 0137467140aaee7d3dd2f75264d34544972a45ef
Parents: 44f5e8f
Author: Siddharth Seth <ss...@apache.org>
Authored: Thu Apr 23 17:26:25 2015 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Fri May 8 14:41:55 2015 -0700
----------------------------------------------------------------------
TEZ-2003-CHANGES.txt | 1 +
.../apache/tez/dag/api/TaskCommunicator.java | 12 +++-
.../org/apache/tez/dag/app/DAGAppMaster.java | 4 +-
.../dag/app/TaskAttemptListenerImpTezDag.java | 17 +++++-
.../dag/app/TaskCommunicatorContextImpl.java | 64 +++++++++++++++++---
.../tez/dag/app/TezTaskCommunicatorImpl.java | 5 ++
.../tez/dag/app/launcher/ContainerLauncher.java | 3 -
.../dag/app/launcher/ContainerLauncherImpl.java | 12 ----
.../app/launcher/ContainerLauncherRouter.java | 10 +++
.../app/launcher/LocalContainerLauncher.java | 9 ---
.../apache/tez/dag/app/MockDAGAppMaster.java | 11 ----
.../rm/TezTestServiceTaskSchedulerService.java | 2 +-
.../TezTestServiceTaskCommunicatorImpl.java | 2 +-
.../tez/tests/TestExternalTezServices.java | 2 +
14 files changed, 103 insertions(+), 51 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/TEZ-2003-CHANGES.txt
----------------------------------------------------------------------
diff --git a/TEZ-2003-CHANGES.txt b/TEZ-2003-CHANGES.txt
index 7c13110..f6bc8e7 100644
--- a/TEZ-2003-CHANGES.txt
+++ b/TEZ-2003-CHANGES.txt
@@ -17,5 +17,6 @@ ALL CHANGES:
TEZ-2285. Allow TaskCommunicators to indicate task/container liveness.
TEZ-2302. Allow TaskCommunicators to subscribe for Vertex updates.
TEZ-2347. Expose additional information in TaskCommunicatorContext.
+ TEZ-2361. Propagate dag completion to TaskCommunicator.
INCOMPATIBLE CHANGES:
http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
index a2cd858..cadca0c 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/TaskCommunicator.java
@@ -14,7 +14,6 @@
package org.apache.tez.dag.api;
-import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.Map;
@@ -74,4 +73,15 @@ public abstract class TaskCommunicator extends AbstractService {
* @throws Exception
*/
public abstract void onVertexStateUpdated(VertexStateUpdate stateUpdate) throws Exception;
+
+ /**
+ * Indicates the current running dag is complete. The TaskCommunicatorContext can be used to
+ * query information about the current dag during the duration of the dagComplete invocation.
+ *
+ * After this, the contents returned from querying the context may change at any point - due to
+ * the next dag being submitted.
+ */
+ // TODO TEZ-2003 This is extremely difficult to use. Add the dagStarted notification, and potentially
+ // throw exceptions between a dagComplete and dagStart invocation.
+ public abstract void dagComplete(String dagName);
}
http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index 1ea369e..568e929 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -759,7 +759,7 @@ public class DAGAppMaster extends AbstractService {
DAGAppMasterEventDagCleanup cleanupEvent = (DAGAppMasterEventDagCleanup) event;
LOG.info("Cleaning up DAG: name=" + cleanupEvent.getDag().getName() + ", with id=" +
cleanupEvent.getDag().getID());
- containerLauncher.dagComplete(cleanupEvent.getDag());
+ containerLauncherRouter.dagComplete(cleanupEvent.getDag());
taskAttemptListener.dagComplete(cleanupEvent.getDag());
nodes.dagComplete(cleanupEvent.getDag());
containers.dagComplete(cleanupEvent.getDag());
@@ -773,7 +773,7 @@ public class DAGAppMaster extends AbstractService {
case NEW_DAG_SUBMITTED:
// Inform sub-components that a new DAG has been submitted.
taskSchedulerEventHandler.dagSubmitted();
- containerLauncher.dagSubmitted();
+ containerLauncherRouter.dagSubmitted();
taskAttemptListener.dagSubmitted();
break;
default:
http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
index aaf9cca..03b5602 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskAttemptListenerImpTezDag.java
@@ -81,6 +81,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
private final AppContext context;
private final TaskCommunicator[] taskCommunicators;
+ private final TaskCommunicatorContext[] taskCommunicatorContexts;
protected final TaskHeartbeatHandler taskHeartbeatHandler;
protected final ContainerHeartbeatHandler containerHeartbeatHandler;
@@ -123,7 +124,9 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
}
}
this.taskCommunicators = new TaskCommunicator[taskCommunicatorClassIdentifiers.length];
+ this.taskCommunicatorContexts = new TaskCommunicatorContext[taskCommunicatorClassIdentifiers.length];
for (int i = 0 ; i < taskCommunicatorClassIdentifiers.length ; i++) {
+ taskCommunicatorContexts[i] = new TaskCommunicatorContextImpl(context, this, i);
taskCommunicators[i] = createTaskCommunicator(taskCommunicatorClassIdentifiers[i], i);
}
// TODO TEZ-2118 Start using taskCommunicator indices properly
@@ -148,10 +151,10 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
private TaskCommunicator createTaskCommunicator(String taskCommClassIdentifier, int taskCommIndex) {
if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT)) {
LOG.info("Using Default Task Communicator");
- return new TezTaskCommunicatorImpl(new TaskCommunicatorContextImpl(context, this, taskCommIndex));
+ return new TezTaskCommunicatorImpl(taskCommunicatorContexts[taskCommIndex]);
} else if (taskCommClassIdentifier.equals(TezConstants.TEZ_AM_SERVICE_PLUGINS_LOCAL_MODE_NAME_DEFAULT)) {
LOG.info("Using Default Local Task Communicator");
- return new TezLocalTaskCommunicatorImpl(new TaskCommunicatorContextImpl(context, this, taskCommIndex));
+ return new TezLocalTaskCommunicatorImpl(taskCommunicatorContexts[taskCommIndex]);
} else {
LOG.info("Using TaskCommunicator: " + taskCommClassIdentifier);
Class<? extends TaskCommunicator> taskCommClazz = (Class<? extends TaskCommunicator>) ReflectionUtils
@@ -159,7 +162,7 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
try {
Constructor<? extends TaskCommunicator> ctor = taskCommClazz.getConstructor(TaskCommunicatorContext.class);
ctor.setAccessible(true);
- return ctor.newInstance(new TaskCommunicatorContextImpl(context, this, taskCommIndex));
+ return ctor.newInstance(taskCommunicatorContexts[taskCommIndex]);
} catch (NoSuchMethodException e) {
throw new TezUncheckedException(e);
} catch (InvocationTargetException e) {
@@ -318,6 +321,14 @@ public class TaskAttemptListenerImpTezDag extends AbstractService implements
// This becomes more relevant when task kills without container kills are allowed.
// TODO TEZ-2336. Send a signal to containers indicating DAG completion.
+
+ // Inform all communicators of the dagCompletion.
+ for (int i = 0 ; i < taskCommunicators.length ; i++) {
+ ((TaskCommunicatorContextImpl)taskCommunicatorContexts[i]).dagCompleteStart(dag);
+ taskCommunicators[i].dagComplete(dag.getName());
+ ((TaskCommunicatorContextImpl)taskCommunicatorContexts[i]).dagCompleteEnd();
+ }
+
}
@Override
http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
index 4cb0c93..790066f 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
@@ -17,6 +17,11 @@ package org.apache.tez.dag.app;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
@@ -33,6 +38,7 @@ import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.api.event.VertexState;
import org.apache.tez.dag.api.event.VertexStateUpdate;
+import org.apache.tez.dag.app.dag.DAG;
import org.apache.tez.dag.app.dag.Vertex;
import org.apache.tez.dag.app.dag.VertexStateUpdateListener;
import org.apache.tez.dag.records.TezTaskAttemptID;
@@ -44,6 +50,10 @@ public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, Ver
private final AppContext context;
private final TaskAttemptListenerImpTezDag taskAttemptListener;
private final int taskCommunicatorIndex;
+ private final ReentrantReadWriteLock.ReadLock dagChangedReadLock;
+ private final ReentrantReadWriteLock.WriteLock dagChangedWriteLock;
+
+ private DAG dag;
public TaskCommunicatorContextImpl(AppContext appContext,
TaskAttemptListenerImpTezDag taskAttemptListener,
@@ -51,6 +61,10 @@ public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, Ver
this.context = appContext;
this.taskAttemptListener = taskAttemptListener;
this.taskCommunicatorIndex = taskCommunicatorIndex;
+
+ ReentrantReadWriteLock dagChangedLock = new ReentrantReadWriteLock();
+ dagChangedReadLock = dagChangedLock.readLock();
+ dagChangedWriteLock = dagChangedLock.writeLock();
}
@Override
@@ -111,18 +125,19 @@ public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, Ver
public void registerForVertexStateUpdates(String vertexName,
@Nullable Set<VertexState> stateSet) {
Preconditions.checkNotNull(vertexName, "VertexName cannot be null: " + vertexName);
- context.getCurrentDAG().getStateChangeNotifier().registerForVertexUpdates(vertexName, stateSet, this);
+ getDag().getStateChangeNotifier().registerForVertexUpdates(vertexName, stateSet,
+ this);
}
@Override
public String getCurretnDagName() {
- return context.getCurrentDAG().getName();
+ return getDag().getName();
}
@Override
public Iterable<String> getInputVertexNames(String vertexName) {
Preconditions.checkNotNull(vertexName, "VertexName cannot be null: " + vertexName);
- Vertex vertex = context.getCurrentDAG().getVertex(vertexName);
+ Vertex vertex = getDag().getVertex(vertexName);
Set<Vertex> sources = vertex.getInputVertices().keySet();
return Iterables.transform(sources, new Function<Vertex, String>() {
@Override
@@ -135,31 +150,32 @@ public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, Ver
@Override
public int getVertexTotalTaskCount(String vertexName) {
Preconditions.checkArgument(vertexName != null, "VertexName must be specified");
- return context.getCurrentDAG().getVertex(vertexName).getTotalTasks();
+ return getDag().getVertex(vertexName).getTotalTasks();
}
@Override
public int getVertexCompletedTaskCount(String vertexName) {
Preconditions.checkArgument(vertexName != null, "VertexName must be specified");
- return context.getCurrentDAG().getVertex(vertexName).getCompletedTasks();
+ return getDag().getVertex(vertexName).getCompletedTasks();
}
@Override
public int getVertexRunningTaskCount(String vertexName) {
Preconditions.checkArgument(vertexName != null, "VertexName must be specified");
- return context.getCurrentDAG().getVertex(vertexName).getRunningTasks();
+ return getDag().getVertex(vertexName).getRunningTasks();
}
@Override
public long getFirstAttemptStartTime(String vertexName, int taskIndex) {
Preconditions.checkArgument(vertexName != null, "VertexName must be specified");
Preconditions.checkArgument(taskIndex >=0, "TaskIndex must be > 0");
- return context.getCurrentDAG().getVertex(vertexName).getTask(taskIndex).getFirstAttemptStartTime();
+ return getDag().getVertex(vertexName).getTask(
+ taskIndex).getFirstAttemptStartTime();
}
@Override
public long getDagStartTime() {
- return context.getCurrentDAG().getStartTime();
+ return getDag().getStartTime();
}
@Override
@@ -171,4 +187,36 @@ public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, Ver
throw new TezUncheckedException(e);
}
}
+
+ private DAG getDag() {
+ dagChangedReadLock.lock();
+ try {
+ if (dag != null) {
+ return dag;
+ } else {
+ return context.getCurrentDAG();
+ }
+ } finally {
+ dagChangedReadLock.unlock();
+ }
+ }
+
+ @InterfaceAudience.Private
+ public void dagCompleteStart(DAG dag) {
+ dagChangedWriteLock.lock();
+ try {
+ this.dag = dag;
+ } finally {
+ dagChangedWriteLock.unlock();
+ }
+ }
+
+ public void dagCompleteEnd() {
+ dagChangedWriteLock.lock();
+ try {
+ this.dag = null;
+ } finally {
+ dagChangedWriteLock.unlock();
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
index 1417a3b..825a4d2 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -258,6 +258,11 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
// Empty. Not registering, or expecting any updates.
}
+ @Override
+ public void dagComplete(String dagName) {
+ // Nothing to do at the moment. Some of the TODOs from TaskAttemptListener apply here.
+ }
+
protected String getTokenIdentifier() {
return tokenIdentifier;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java
index 8a8498f..ea07a1d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncher.java
@@ -26,7 +26,4 @@ import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
public interface ContainerLauncher
extends EventHandler<NMCommunicatorEvent> {
- void dagComplete(DAG dag);
-
- void dagSubmitted();
}
http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherImpl.java
index 94889a1..a1eb2a7 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherImpl.java
@@ -30,7 +30,6 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.lang.exception.ExceptionUtils;
-import org.apache.tez.dag.app.dag.DAG;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -111,17 +110,6 @@ public class ContainerLauncherImpl extends AbstractService implements
}
}
- @Override
- public void dagComplete(DAG dag) {
- // Nothing required at the moment. Containers are shared across DAGs
- }
-
- @Override
- public void dagSubmitted() {
- // Nothing to do right now. Indicates that a new DAG has been submitted and
- // the context has updated information.
- }
-
private static enum ContainerState {
PREP, FAILED, RUNNING, DONE, KILLED_BEFORE_LAUNCH
}
http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
index dd3571e..db145f4 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherRouter.java
@@ -27,6 +27,7 @@ import org.apache.tez.dag.api.TezConstants;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.app.AppContext;
import org.apache.tez.dag.app.TaskAttemptListener;
+import org.apache.tez.dag.app.dag.DAG;
import org.apache.tez.dag.app.rm.NMCommunicatorEvent;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -128,6 +129,15 @@ public class ContainerLauncherRouter extends AbstractService
}
}
+ public void dagComplete(DAG dag) {
+ // Nothing required at the moment. Containers are shared across DAGs
+ }
+
+ public void dagSubmitted() {
+ // Nothing to do right now. Indicates that a new DAG has been submitted and
+ // the context has updated information.
+ }
+
@Override
public void handle(NMCommunicatorEvent event) {
http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
index 18b2e35..305f8b3 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
@@ -44,7 +44,6 @@ import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
-import org.apache.tez.dag.app.dag.DAG;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -169,14 +168,6 @@ public class LocalContainerLauncher extends AbstractService implements
callbackExecutor.shutdownNow();
}
- @Override
- public void dagComplete(DAG dag) {
- }
-
- @Override
- public void dagSubmitted() {
- }
-
// Thread to monitor the queue of incoming NMCommunicator events
private class TezSubTaskRunner implements Runnable {
@Override
http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index 051497b..845c440 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -34,10 +34,8 @@ import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
-import org.apache.tez.dag.app.dag.DAG;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.service.AbstractService;
@@ -54,7 +52,6 @@ import org.apache.tez.common.counters.TezCounters;
import org.apache.tez.dag.api.TaskHeartbeatRequest;
import org.apache.tez.dag.api.TaskHeartbeatResponse;
import org.apache.tez.dag.api.TezConfiguration;
-import org.apache.tez.dag.api.TaskCommunicator;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.app.launcher.ContainerLauncher;
import org.apache.tez.dag.app.launcher.ContainerLauncherRouter;
@@ -150,14 +147,6 @@ public class MockDAGAppMaster extends DAGAppMaster {
this.goFlag = goFlag;
}
- @Override
- public void dagComplete(DAG dag) {
- }
-
- @Override
- public void dagSubmitted() {
- }
-
public class ContainerData {
ContainerId cId;
TezTaskAttemptID taId;
http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
index 50dfb24..073cb50 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java
@@ -158,7 +158,7 @@ public class TezTestServiceTaskSchedulerService extends TaskSchedulerService {
}
@Override
- public void resetMatchLocalityForAllHeldContainers() {
+ public void dagComplete() {
}
@Override
http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
index ef983c2..cf28b11 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java
@@ -150,7 +150,7 @@ public class TezTestServiceTaskCommunicatorImpl extends TezTaskCommunicatorImpl
t = se.getCause();
}
if (t instanceof RemoteException) {
- RemoteException re = (RemoteException)t;
+ RemoteException re = (RemoteException) t;
String message = re.toString();
if (message.contains(RejectedExecutionException.class.getName())) {
getTaskCommunicatorContext().taskKilled(taskSpec.getTaskAttemptID(),
http://git-wip-us.apache.org/repos/asf/tez/blob/01374671/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
index 4d0a610..45c70f1 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java
@@ -39,6 +39,7 @@ import org.apache.tez.dag.app.taskcomm.TezTestServiceTaskCommunicatorImpl;
import org.apache.tez.examples.HashJoinExample;
import org.apache.tez.examples.JoinDataGen;
import org.apache.tez.examples.JoinValidateConfigured;
+import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
import org.apache.tez.runtime.library.processor.SleepProcessor;
import org.apache.tez.service.MiniTezTestServiceCluster;
import org.apache.tez.service.impl.ContainerRunnerImpl;
@@ -124,6 +125,7 @@ public class TestExternalTezServices {
remoteFs.mkdirs(stagingDirPath);
// This is currently configured to push tasks into the Service, and then use the standard RPC
confForJobs.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
+ confForJobs.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, false);
confForJobs.setStrings(TezConfiguration.TEZ_AM_TASK_SCHEDULERS,
TezConstants.TEZ_AM_SERVICE_PLUGINS_NAME_DEFAULT,
[12/43] tez git commit: TEZ-776. Reduce AM mem usage caused by
storing TezEvents (bikas)
Posted by ss...@apache.org.
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java
index 2ea0299..d177460 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java
@@ -33,9 +33,9 @@ import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability.Evolving;
import org.apache.hadoop.conf.Configuration;
import org.apache.tez.common.TezUtils;
-import org.apache.tez.dag.api.EdgeManagerPlugin;
import org.apache.tez.dag.api.EdgeManagerPluginContext;
import org.apache.tez.dag.api.EdgeManagerPluginDescriptor;
+import org.apache.tez.dag.api.EdgeManagerPluginOnDemand;
import org.apache.tez.dag.api.EdgeProperty;
import org.apache.tez.dag.api.EdgeProperty.DataMovementType;
import org.apache.tez.dag.api.InputDescriptor;
@@ -160,12 +160,15 @@ public class ShuffleVertexManager extends VertexManagerPlugin {
super(context);
}
- public static class CustomShuffleEdgeManager extends EdgeManagerPlugin {
+ public static class CustomShuffleEdgeManager extends EdgeManagerPluginOnDemand {
int numSourceTaskOutputs;
int numDestinationTasks;
int basePartitionRange;
int remainderRangeForLastShuffler;
int numSourceTasks;
+
+ int[][] sourceIndices;
+ int[][] targetIndices;
public CustomShuffleEdgeManager(EdgeManagerPluginContext context) {
super(context);
@@ -231,8 +234,106 @@ public class ShuffleVertexManager extends VertexManagerPlugin {
destinationTaskAndInputIndices.put(destinationTaskIndex, Collections.singletonList(targetIndex));
}
+
+ @Override
+ public EventRouteMetadata routeDataMovementEventToDestination(
+ int sourceTaskIndex, int sourceOutputIndex, int destTaskIndex) throws Exception {
+ int sourceIndex = sourceOutputIndex;
+ int destinationTaskIndex = sourceIndex/basePartitionRange;
+ if (destinationTaskIndex != destTaskIndex) {
+ return null;
+ }
+ int partitionRange = 1;
+ if(destinationTaskIndex < numDestinationTasks-1) {
+ partitionRange = basePartitionRange;
+ } else {
+ partitionRange = remainderRangeForLastShuffler;
+ }
+
+ // all inputs from a source task are next to each other in original order
+ int targetIndex =
+ sourceTaskIndex * partitionRange
+ + sourceIndex % partitionRange;
+ return EventRouteMetadata.create(1, new int[]{targetIndex});
+ }
+
+ private int[] createIndices(int partitionRange, int taskIndex, int offSetPerTask) {
+ int startIndex = taskIndex * offSetPerTask;
+ int[] indices = new int[partitionRange];
+ for (int currentIndex = 0; currentIndex < partitionRange; ++currentIndex) {
+ indices[currentIndex] = (startIndex + currentIndex);
+ }
+ return indices;
+ }
+
+ @Override
+ public void prepareForRouting() throws Exception {
+ // target indices derive from num src tasks
+ int numSourceTasks = getContext().getSourceVertexNumTasks();
+ targetIndices = new int[numSourceTasks][];
+ for (int srcTaskIndex=0; srcTaskIndex<numSourceTasks; ++srcTaskIndex) {
+ targetIndices[srcTaskIndex] = createIndices(basePartitionRange, srcTaskIndex,
+ basePartitionRange);
+ }
+
+ // source indices derive from num dest tasks (==partitions)
+ int numTargetTasks = getContext().getDestinationVertexNumTasks();
+ sourceIndices = new int[numTargetTasks][];
+ for (int destTaskIndex=0; destTaskIndex<numTargetTasks; ++destTaskIndex) {
+ int partitionRange = basePartitionRange;
+ if (destTaskIndex == (numTargetTasks-1)) {
+ partitionRange = remainderRangeForLastShuffler;
+ }
+ // skip the basePartitionRange per destination task
+ sourceIndices[destTaskIndex] = createIndices(partitionRange, destTaskIndex,
+ basePartitionRange);
+ }
+ }
+
+ private int[] createTargetIndicesForRemainder(int srcTaskIndex) {
+ // for the last task just generate on the fly instead of doubling the memory
+ return createIndices(remainderRangeForLastShuffler, srcTaskIndex,
+ remainderRangeForLastShuffler);
+ }
@Override
+ public @Nullable EventRouteMetadata routeCompositeDataMovementEventToDestination(
+ int sourceTaskIndex, int destinationTaskIndex)
+ throws Exception {
+ int[] targetIndicesToSend;
+ int partitionRange;
+ if(destinationTaskIndex == (numDestinationTasks-1)) {
+ if (remainderRangeForLastShuffler != basePartitionRange) {
+ targetIndicesToSend = createTargetIndicesForRemainder(sourceTaskIndex);
+ } else {
+ targetIndicesToSend = targetIndices[sourceTaskIndex];
+ }
+ partitionRange = remainderRangeForLastShuffler;
+ } else {
+ targetIndicesToSend = targetIndices[sourceTaskIndex];
+ partitionRange = basePartitionRange;
+ }
+
+ return EventRouteMetadata.create(partitionRange, targetIndicesToSend,
+ sourceIndices[destinationTaskIndex]);
+ }
+
+ @Override
+ public EventRouteMetadata routeInputSourceTaskFailedEventToDestination(
+ int sourceTaskIndex, int destinationTaskIndex) throws Exception {
+ int partitionRange = basePartitionRange;
+ if (destinationTaskIndex == (numDestinationTasks-1)) {
+ partitionRange = remainderRangeForLastShuffler;
+ }
+ int startOffset = sourceTaskIndex * partitionRange;
+ int[] targetIndices = new int[partitionRange];
+ for (int i=0; i<partitionRange; ++i) {
+ targetIndices[i] = (startOffset + i);
+ }
+ return EventRouteMetadata.create(partitionRange, targetIndices);
+ }
+
+ @Override
public void routeInputSourceTaskFailedEventToDestination(int sourceTaskIndex,
Map<Integer, List<Integer>> destinationTaskAndInputIndices) {
if (remainderRangeForLastShuffler < basePartitionRange) {
@@ -281,6 +382,18 @@ public class ShuffleVertexManager extends VertexManagerPlugin {
}
@Override
+ public int routeInputErrorEventToSource(int destinationTaskIndex,
+ int destinationFailedInputIndex) {
+ int partitionRange = 1;
+ if(destinationTaskIndex < numDestinationTasks-1) {
+ partitionRange = basePartitionRange;
+ } else {
+ partitionRange = remainderRangeForLastShuffler;
+ }
+ return destinationFailedInputIndex/partitionRange;
+ }
+
+ @Override
public int getNumDestinationConsumerTasks(int sourceTaskIndex) {
return numDestinationTasks;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/05f77fe2/tez-tests/src/test/java/org/apache/tez/test/TestExceptionPropagation.java
----------------------------------------------------------------------
diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestExceptionPropagation.java b/tez-tests/src/test/java/org/apache/tez/test/TestExceptionPropagation.java
index 70be21b..7ba6028 100644
--- a/tez-tests/src/test/java/org/apache/tez/test/TestExceptionPropagation.java
+++ b/tez-tests/src/test/java/org/apache/tez/test/TestExceptionPropagation.java
@@ -308,7 +308,7 @@ public class TestExceptionPropagation {
// EdgeManager
EM_Initialize, EM_GetNumDestinationTaskPhysicalInputs, EM_GetNumSourceTaskPhysicalOutputs,
EM_RouteDataMovementEventToDestination, EM_GetNumDestinationConsumerTasks,
- EM_RouteInputErrorEventToSource,
+ EM_RouteInputErrorEventToSource, EM_PrepareForRouting,
// Not Supported yet
// EM_RouteInputSourceTaskFailedEventToDestination,
@@ -814,6 +814,33 @@ public class TestExceptionPropagation {
super.routeDataMovementEventToDestination(event, sourceTaskIndex,
sourceOutputIndex, destinationTaskAndInputIndices);
}
+
+ @Override
+ public void prepareForRouting() throws Exception {
+ if (exLocation == ExceptionLocation.EM_PrepareForRouting) {
+ throw new RuntimeException(exLocation.name());
+ }
+ super.prepareForRouting();
+ }
+
+ @Override
+ public EventRouteMetadata routeDataMovementEventToDestination(
+ int sourceTaskIndex, int sourceOutputIndex, int destinationTaskIndex) throws Exception {
+ if (exLocation == ExceptionLocation.EM_RouteDataMovementEventToDestination) {
+ throw new RuntimeException(exLocation.name());
+ }
+ return super.routeDataMovementEventToDestination(sourceTaskIndex, sourceOutputIndex, destinationTaskIndex);
+ }
+
+ @Override
+ public EventRouteMetadata routeCompositeDataMovementEventToDestination(
+ int sourceTaskIndex, int destinationTaskIndex)
+ throws Exception {
+ if (exLocation == ExceptionLocation.EM_RouteDataMovementEventToDestination) {
+ throw new RuntimeException(exLocation.name());
+ }
+ return super.routeCompositeDataMovementEventToDestination(sourceTaskIndex, destinationTaskIndex);
+ }
@Override
public int routeInputErrorEventToSource(InputReadErrorEvent event,
@@ -826,6 +853,16 @@ public class TestExceptionPropagation {
}
@Override
+ public int routeInputErrorEventToSource(int destinationTaskIndex,
+ int destinationFailedInputIndex) {
+ if (exLocation == ExceptionLocation.EM_RouteInputErrorEventToSource) {
+ throw new RuntimeException(exLocation.name());
+ }
+ return super.routeInputErrorEventToSource(destinationTaskIndex,
+ destinationFailedInputIndex);
+ }
+
+ @Override
public void routeInputSourceTaskFailedEventToDestination(
int sourceTaskIndex,
Map<Integer, List<Integer>> destinationTaskAndInputIndices) {