You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@reef.apache.org by ju...@apache.org on 2016/11/23 20:20:07 UTC

reef git commit: [REEF-1625] Fix TestFailMapperEvaluatorsOnDispose failures in AppVeyor

Repository: reef
Updated Branches:
  refs/heads/master 729128ef9 -> 35b48e1ea


[REEF-1625] Fix TestFailMapperEvaluatorsOnDispose failures in AppVeyor

This change reduces the number of failing evaluators in the test and
fine-tunes the checks to account for possible retries.
See JIRA for explanation of time-sensitivity of this test.

JIRA:
  [REEF-1625](https://issues.apache.org/jira/browse/REEF-1625)

Pull request:
  This closes #1189


Project: http://git-wip-us.apache.org/repos/asf/reef/repo
Commit: http://git-wip-us.apache.org/repos/asf/reef/commit/35b48e1e
Tree: http://git-wip-us.apache.org/repos/asf/reef/tree/35b48e1e
Diff: http://git-wip-us.apache.org/repos/asf/reef/diff/35b48e1e

Branch: refs/heads/master
Commit: 35b48e1eab365ff609a49ee97b0143805d7b61f3
Parents: 729128e
Author: Mariia Mykhailova <ma...@apache.org>
Authored: Tue Nov 22 17:28:25 2016 -0800
Committer: Julia Wang <jw...@yahoo.com>
Committed: Wed Nov 23 12:18:46 2016 -0800

----------------------------------------------------------------------
 .../IMRU/TestFailMapperEvaluatorsOnDispose.cs     | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/reef/blob/35b48e1e/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluatorsOnDispose.cs
----------------------------------------------------------------------
diff --git a/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluatorsOnDispose.cs b/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluatorsOnDispose.cs
index 017580e..5a09739 100644
--- a/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluatorsOnDispose.cs
+++ b/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluatorsOnDispose.cs
@@ -54,18 +54,23 @@ namespace Org.Apache.REEF.Tests.Functional.IMRU
                 NumberOfRetry,
                 testFolder);
             string[] lines = ReadLogFile(DriverStdout, "driver", testFolder, 360);
-            var completedTaskCount = GetMessageCount(lines, "Received ICompletedTask");
             var failedEvaluatorCount = GetMessageCount(lines, FailedEvaluatorMessage);
             var failedTaskCount = GetMessageCount(lines, FailedTaskMessage);
             var jobSuccess = GetMessageCount(lines, IMRUDriver<int[], int[], int[], int[]>.DoneActionPrefix);
 
-            // In first retry, all tasks are completed and then there are 2 failed evaluators. 
-            // No failed tasks.
-            Assert.Equal(2, failedEvaluatorCount);
+            // In this test one of evaluators fails at task dispose stage. Depending on the timing of the failure,
+            // if it happens after all tasks completed, the job succeeds immediately,
+            // but if it happens before that, this counts as failure and job restarts.
+            // Number of tries done can be detected as number of recoveries done + 1
+            var triesDone = GetMessageCount(lines, "Start recovery") + 1;
+
+            // There should be no failed tasks.
+            // Number of failed evaluators = number of tries done
+            // Can't say anything about the number of completed tasks (depends on timing)
+            Assert.Equal(triesDone, failedEvaluatorCount);
             Assert.Equal(0, failedTaskCount);
-            Assert.Equal(numTasks, completedTaskCount);
 
-            // eventually job succeeds
+            // but eventually job must succeed
             Assert.Equal(1, jobSuccess);
             CleanUp(testFolder);
         }
@@ -79,7 +84,6 @@ namespace Org.Apache.REEF.Tests.Functional.IMRU
 
             return TangFactory.GetTang().NewConfigurationBuilder(c)
                 .BindSetEntry<PipelinedBroadcastAndReduceWithFaultTolerant.TaskIdsToFail, string>(GenericType<PipelinedBroadcastAndReduceWithFaultTolerant.TaskIdsToFail>.Class, "IMRUMap-RandomInputPartition-2-")
-                .BindSetEntry<PipelinedBroadcastAndReduceWithFaultTolerant.TaskIdsToFail, string>(GenericType<PipelinedBroadcastAndReduceWithFaultTolerant.TaskIdsToFail>.Class, "IMRUMap-RandomInputPartition-3-")
                 .BindIntNamedParam<PipelinedBroadcastAndReduceWithFaultTolerant.FailureType>(PipelinedBroadcastAndReduceWithFaultTolerant.FailureType.EvaluatorFailureDuringTaskDispose.ToString())
                 .BindNamedParameter(typeof(MaxRetryNumberInRecovery), NumberOfRetry.ToString())
                 .BindNamedParameter(typeof(PipelinedBroadcastAndReduceWithFaultTolerant.TotalNumberOfForcedFailures), NumberOfRetry.ToString())