You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@reef.apache.org by we...@apache.org on 2015/05/30 17:57:58 UTC

incubator-reef git commit: [REEF-308] Ignore missing evaluator state transitions in the driver

Repository: incubator-reef
Updated Branches:
  refs/heads/master 4b2190b13 -> 8c4456415


[REEF-308] Ignore missing evaluator state transitions in the driver

This does not solve the issue, but will not fail the job driver if tasks have
sent an out of order status message, given that the status reported is
forward-progressing.

JIRA:
  [REEF-308](https://issues.apache.org/jira/browse/REEF-308)

Pull Request:
  This closes #198

*Note*: The commit below has the wrong summary it should have been:

[REEF-168] Make EvaluatorRequestor injectable in REEF.NET


Project: http://git-wip-us.apache.org/repos/asf/incubator-reef/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-reef/commit/8c445641
Tree: http://git-wip-us.apache.org/repos/asf/incubator-reef/tree/8c445641
Diff: http://git-wip-us.apache.org/repos/asf/incubator-reef/diff/8c445641

Branch: refs/heads/master
Commit: 8c44564158c288fc9069a6579fbb6f52091bf5a9
Parents: 4b2190b
Author: afchung <af...@gmail.com>
Authored: Fri May 29 14:19:12 2015 -0700
Committer: Markus Weimer <we...@apache.org>
Committed: Sat May 30 08:52:18 2015 -0700

----------------------------------------------------------------------
 .../common/driver/evaluator/EvaluatorManager.java  | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-reef/blob/8c445641/lang/java/reef-common/src/main/java/org/apache/reef/runtime/common/driver/evaluator/EvaluatorManager.java
----------------------------------------------------------------------
diff --git a/lang/java/reef-common/src/main/java/org/apache/reef/runtime/common/driver/evaluator/EvaluatorManager.java b/lang/java/reef-common/src/main/java/org/apache/reef/runtime/common/driver/evaluator/EvaluatorManager.java
index da533af..5bbc089 100644
--- a/lang/java/reef-common/src/main/java/org/apache/reef/runtime/common/driver/evaluator/EvaluatorManager.java
+++ b/lang/java/reef-common/src/main/java/org/apache/reef/runtime/common/driver/evaluator/EvaluatorManager.java
@@ -255,7 +255,7 @@ public final class EvaluatorManager implements Identifiable, AutoCloseable {
   public void onEvaluatorException(final EvaluatorException exception) {
     synchronized (this.evaluatorDescriptor) {
       if (this.stateManager.isDoneOrFailedOrKilled()) {
-        LOG.log(Level.FINE, "Ignoring an exception receivedfor Evaluator {0} which is already in state {1}.",
+        LOG.log(Level.FINE, "Ignoring an exception received for Evaluator {0} which is already in state {1}.",
             new Object[]{this.getId(), this.stateManager});
         return;
       }
@@ -460,9 +460,19 @@ public final class EvaluatorManager implements Identifiable, AutoCloseable {
     if (!(this.task.isPresent() && this.task.get().getId().equals(taskStatusProto.getTaskId()))) {
       if (taskStatusProto.getState() == ReefServiceProtos.State.INIT ||
           taskStatusProto.getState() == ReefServiceProtos.State.FAILED ||
+          taskStatusProto.getState() == ReefServiceProtos.State.RUNNING ||
           taskStatusProto.getRecovery() // for task from recovered evaluators
           ) {
 
+        // [REEF-308] exposes a bug where the .NET evaluator does not send its states in the right order
+        // [REEF-289] is a related item which may fix the issue
+        if (taskStatusProto.getState() == ReefServiceProtos.State.RUNNING) {
+          LOG.log(Level.WARNING,
+                  "Received a message of state " + ReefServiceProtos.State.RUNNING +
+                  " for Task " + taskStatusProto.getTaskId() +
+                  " before receiving its " + ReefServiceProtos.State.INIT + " state");
+        }
+
         // FAILED is a legal first state of a Task as it could have failed during construction.
         this.task = Optional.of(
             new TaskRepresenter(taskStatusProto.getTaskId(),
@@ -471,8 +481,9 @@ public final class EvaluatorManager implements Identifiable, AutoCloseable {
                 this,
                 this.exceptionCodec));
       } else {
-        throw new RuntimeException("Received an message of state " + taskStatusProto.getState() +
-            ", not INIT or FAILED for Task " + taskStatusProto.getTaskId() + " which we haven't heard from before.");
+        throw new RuntimeException("Received a message of state " + taskStatusProto.getState() +
+            ", not INIT, RUNNING, or FAILED for Task " + taskStatusProto.getTaskId() +
+            " which we haven't heard from before.");
       }
     }
     this.task.get().onTaskStatusMessage(taskStatusProto);