You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@reef.apache.org by ju...@apache.org on 2016/11/23 20:20:07 UTC
reef git commit: [REEF-1625] Fix TestFailMapperEvaluatorsOnDispose
failures in AppVeyor
Repository: reef
Updated Branches:
refs/heads/master 729128ef9 -> 35b48e1ea
[REEF-1625] Fix TestFailMapperEvaluatorsOnDispose failures in AppVeyor
This change reduces the number of failing evaluators in the test and
fine-tunes the checks to account for possible retries.
See JIRA for explanation of time-sensitivity of this test.
JIRA:
[REEF-1625](https://issues.apache.org/jira/browse/REEF-1625)
Pull request:
This closes #1189
Project: http://git-wip-us.apache.org/repos/asf/reef/repo
Commit: http://git-wip-us.apache.org/repos/asf/reef/commit/35b48e1e
Tree: http://git-wip-us.apache.org/repos/asf/reef/tree/35b48e1e
Diff: http://git-wip-us.apache.org/repos/asf/reef/diff/35b48e1e
Branch: refs/heads/master
Commit: 35b48e1eab365ff609a49ee97b0143805d7b61f3
Parents: 729128e
Author: Mariia Mykhailova <ma...@apache.org>
Authored: Tue Nov 22 17:28:25 2016 -0800
Committer: Julia Wang <jw...@yahoo.com>
Committed: Wed Nov 23 12:18:46 2016 -0800
----------------------------------------------------------------------
.../IMRU/TestFailMapperEvaluatorsOnDispose.cs | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/reef/blob/35b48e1e/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluatorsOnDispose.cs
----------------------------------------------------------------------
diff --git a/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluatorsOnDispose.cs b/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluatorsOnDispose.cs
index 017580e..5a09739 100644
--- a/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluatorsOnDispose.cs
+++ b/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluatorsOnDispose.cs
@@ -54,18 +54,23 @@ namespace Org.Apache.REEF.Tests.Functional.IMRU
NumberOfRetry,
testFolder);
string[] lines = ReadLogFile(DriverStdout, "driver", testFolder, 360);
- var completedTaskCount = GetMessageCount(lines, "Received ICompletedTask");
var failedEvaluatorCount = GetMessageCount(lines, FailedEvaluatorMessage);
var failedTaskCount = GetMessageCount(lines, FailedTaskMessage);
var jobSuccess = GetMessageCount(lines, IMRUDriver<int[], int[], int[], int[]>.DoneActionPrefix);
- // In first retry, all tasks are completed and then there are 2 failed evaluators.
- // No failed tasks.
- Assert.Equal(2, failedEvaluatorCount);
+ // In this test one of evaluators fails at task dispose stage. Depending on the timing of the failure,
+ // if it happens after all tasks completed, the job succeeds immediately,
+ // but if it happens before that, this counts as failure and job restarts.
+ // Number of tries done can be detected as number of recoveries done + 1
+ var triesDone = GetMessageCount(lines, "Start recovery") + 1;
+
+ // There should be no failed tasks.
+ // Number of failed evaluators = number of tries done
+ // Can't say anything about the number of completed tasks (depends on timing)
+ Assert.Equal(triesDone, failedEvaluatorCount);
Assert.Equal(0, failedTaskCount);
- Assert.Equal(numTasks, completedTaskCount);
- // eventually job succeeds
+ // but eventually job must succeed
Assert.Equal(1, jobSuccess);
CleanUp(testFolder);
}
@@ -79,7 +84,6 @@ namespace Org.Apache.REEF.Tests.Functional.IMRU
return TangFactory.GetTang().NewConfigurationBuilder(c)
.BindSetEntry<PipelinedBroadcastAndReduceWithFaultTolerant.TaskIdsToFail, string>(GenericType<PipelinedBroadcastAndReduceWithFaultTolerant.TaskIdsToFail>.Class, "IMRUMap-RandomInputPartition-2-")
- .BindSetEntry<PipelinedBroadcastAndReduceWithFaultTolerant.TaskIdsToFail, string>(GenericType<PipelinedBroadcastAndReduceWithFaultTolerant.TaskIdsToFail>.Class, "IMRUMap-RandomInputPartition-3-")
.BindIntNamedParam<PipelinedBroadcastAndReduceWithFaultTolerant.FailureType>(PipelinedBroadcastAndReduceWithFaultTolerant.FailureType.EvaluatorFailureDuringTaskDispose.ToString())
.BindNamedParameter(typeof(MaxRetryNumberInRecovery), NumberOfRetry.ToString())
.BindNamedParameter(typeof(PipelinedBroadcastAndReduceWithFaultTolerant.TotalNumberOfForcedFailures), NumberOfRetry.ToString())