You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by bu...@apache.org on 2019/10/28 22:24:36 UTC

svn commit: r1869089 - in /uima/uima-ducc/trunk/uima-ducc-web/src/main: java/org/apache/uima/ducc/ws/handlers/experiments/ java/org/apache/uima/ducc/ws/xd/ webapp/root/ webapp/root/js/

Author: burn
Date: Mon Oct 28 22:24:35 2019
New Revision: 1869089

URL: http://svn.apache.org/viewvc?rev=1869089&view=rev
Log:
UIMA-6130 Don't save the restart state in the file; update the details page with consistent data when restarting

Modified:
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/handlers/experiments/HandlerExperimentsServlets.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/Experiment.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/ExperimentsRegistryManager.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/ExperimentsRegistryUtilities.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/IExperiment.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/Jed.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/experiment.details.jsp
    uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/js/ducc.local.js

Modified: uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/handlers/experiments/HandlerExperimentsServlets.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/handlers/experiments/HandlerExperimentsServlets.java?rev=1869089&r1=1869088&r2=1869089&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/handlers/experiments/HandlerExperimentsServlets.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/handlers/experiments/HandlerExperimentsServlets.java Mon Oct 28 22:24:35 2019
@@ -144,6 +144,7 @@ public class HandlerExperimentsServlets
         health = "red";
         break;
       case Running:
+      case Restarting:
         health = "green";
         break;
       default:
@@ -238,7 +239,7 @@ public class HandlerExperimentsServlets
     return handled;
   }
 
-  // Note - could put the 2 boolean flags in the experiment ??
+  // TODO - Might be nice to mark the top-level tasks as "Restarting"
   private String decorateState(IExperiment experiment, Task task, boolean isRestartable, boolean isCanceled) {
     String mName = "decorateState";
     String state = "";
@@ -454,7 +455,7 @@ public class HandlerExperimentsServlets
         // Find the latest duccId to display for a task ... omit if not started or has been reset for a rerun
         for (Task task : tasks) {
           long latestDuccId = 0;
-          if (task.type != null) {
+          if (task.status != null && !task.rerun) {
             Jed.Type jedType = Jed.Type.getEnum(task.type);
             if (jedType == Jed.Type.Ducc_Job || jedType == Jed.Type.Java) {
               long[] duccIds = task.duccId;
@@ -481,6 +482,18 @@ public class HandlerExperimentsServlets
 
     fmt.send(response);
     
+    // Restart the experiment AFTER the page has been generated
+    // Don't trigger on the restart state of the experiment as do't want to launch on every page refresh
+    // If restart fails clear the restarting state
+    boolean restart = request.getParameter("restart") != null;
+    if (restart) {
+      boolean ok = ExperimentsRegistryUtilities.launchJed(experiment);
+      if (!ok) {
+        WsLog.warn(cName, mName, "Failed to relaunch JED - reset state");
+        experiment.updateStatus(null);
+      }
+    }
+    
     handled = true;
 
     WsLog.exit(cName, mName);
@@ -525,8 +538,14 @@ public class HandlerExperimentsServlets
       if (experiment.getJedDuccId() != null &&
         HandlersHelper.getAuthorizationStatus(request, experiment.getUser()) == AuthorizationStatus.LoggedInOwner) {
         restart = request.getParameter("restart") != null;
+        if (restart) {          // Update the state of the tasks for the restart ... DuccId not known yet
+          experiment.updateStatus("?");
+        }
         Status status = experiment.getStatus();
-        if (restart || status == Jed.Status.Restarting) {
+        // TODO - If still restarting should check if the restartJedId AP is actually running
+        // If not the launch must have failed for some reason, and we should reset the restart status.
+        // If the restart worked JED should have updated the state file and the Experiment object replaced.
+        if (status == Jed.Status.Restarting) {
           button = "<button style='background-color:Beige;font-size:16px' "
                   + "disabled"
                   + " title='experiment is restarting'>Restarting...</button>";
@@ -554,10 +573,6 @@ public class HandlerExperimentsServlets
 
     response.getWriter().println(sb);
     
-    if (restart) {
-      ExperimentsRegistryUtilities.launchJed(experiment);
-    }
-
     WsLog.exit(cName, mName);
     return true;
   }

Modified: uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/Experiment.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/Experiment.java?rev=1869089&r1=1869088&r2=1869089&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/Experiment.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/Experiment.java Mon Oct 28 22:24:35 2019
@@ -49,7 +49,9 @@ public class Experiment implements IExpe
 
   private DuccId jedDuccId;
 
-  
+  private String restartJedId = null;  // ID of re-launched JED
+
+    
   public Experiment(String user, String directory, long fileDate, ArrayList<Task> tasks, IDuccWork work) {
     this.user = user;
     this.directory = directory;
@@ -108,16 +110,19 @@ public class Experiment implements IExpe
     return retVal;
   }
 
-  // TODO - the experiment status could be determined in the constructor
+  // TODO - the experiment status could be determined in the constructor but the restarting state can change
+  // The restarting state is NOT saved in the Experiment.state file
   @Override
   public Jed.Status getStatus() {
     Jed.Status retVal = Jed.Status.Unknown;
+    if (restartJedId != null) {
+      return Jed.Status.Restarting;
+    }
     if (tasks != null) {
       boolean canceled = false;
       boolean failed = false;
       boolean running = false;
       boolean done = false;
-      boolean restarting = false;
       for (Task task : tasks) {
         if (task.parentId == 0 && task.status != null) {
           Jed.Status status = Jed.Status.getEnum(task.status);
@@ -128,9 +133,6 @@ public class Experiment implements IExpe
             case Running:
               running = true;
               break;
-            case Restarting:
-              restarting = true;
-              break;
             case Failed:
             case DependencyFailed:
               failed = true;
@@ -149,8 +151,6 @@ public class Experiment implements IExpe
       // But if JED appears to have been killed while running change state to Unknown
       if (running) {
         retVal = isStale() ? Jed.Status.Unknown : Jed.Status.Running;
-      } else if (restarting) {
-        retVal = Jed.Status.Restarting;
       } else if (failed) {
         retVal = Jed.Status.Failed;
       } else if (canceled) {
@@ -165,26 +165,26 @@ public class Experiment implements IExpe
   }
 
   /*
-   * Set status of the top-level task(s) to "Restarting",
-   * clear status of all rerun tasks selected to be rerun,
-   + then rewrite the Experiment.state file
+   * Set/clear the restarting status of the experiment
+   * When restarting is initially requested the ID is not available,
+   * it is provided when launchJed succeeds.
    */
   @Override
-  public boolean updateStateFile(String umask) {
-    if (tasks == null) {
-      return true;
-    }
-    for (Task task : tasks) {
-      if (task.parentId == 0) {
-        task.status = "Restarting";
-      } else if (task.rerun) {
-        // Indicate that task has not yet started
-        task.status = null;
-        task.startTime = null;
-        task.runTime = 0;
+  public void updateStatus(String restartJedId) {
+    String mName = "updateStatus";
+    WsLog.info(logger, mName, "Restart JED ID = " + restartJedId);
+    this.restartJedId  = restartJedId;
+    if (restartJedId != null) {
+      for (Task task : tasks) {
+        if (task.rerun) {
+          task.status = null;
+          if (Jed.Type.isLeaf(task.type)) { // Times are not accumulated for primitive tasks
+            task.startTime = null;
+            task.runTime = 0;
+          }
+        }
       }
     }
-    return writeStateFile(umask);
   }
   
   /*
@@ -192,7 +192,8 @@ public class Experiment implements IExpe
    * as the user copy it to the output directory,
    * delete the temp file.
    */
-  private boolean writeStateFile(String umask) {
+  @Override
+  public boolean writeStateFile(String umask) {
     File tempFile = null;
     Gson gson = new GsonBuilder().excludeFieldsWithoutExposeAnnotation().setPrettyPrinting().create();
     try {
@@ -209,13 +210,18 @@ public class Experiment implements IExpe
     File stateFile = new File(directory, "Experiment.state");
     HashMap<String, String> environment = new HashMap<String, String>();
     environment.put("DUCC_UMASK", umask);
-    String sysout = DuccAsUser.execute(user, environment, "/bin/cp", tempFile.getAbsolutePath(), stateFile.getAbsolutePath());
-    if (sysout.length() == 0) {
+    // Synchronize with the check for a newer state file in ExperimentsRegistryManager to ensure that
+    // the file does not look newer that the in-memory Experiment
+    synchronized (this) {
+      String sysout = DuccAsUser.execute(user, environment, "/bin/cp", tempFile.getAbsolutePath(), stateFile.getAbsolutePath());
+      if (sysout.length() > 0) {
+        WsLog.error(logger, "writeExperiment", "Failed to copy experiment state file\n" + sysout);
+        return false;
+      }
+      fileDate = System.currentTimeMillis();   // Will be > actual filetime
       tempFile.delete();
       return true;
     }
-    WsLog.error(logger, "writeExperiment", "Failed to copy experiment state file\n" + sysout);
-    return false;
   }
   
   private static SimpleDateFormat sdf = new SimpleDateFormat("yyyy.MM.dd-HH:mm:ss");

Modified: uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/ExperimentsRegistryManager.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/ExperimentsRegistryManager.java?rev=1869089&r1=1869088&r2=1869089&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/ExperimentsRegistryManager.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/ExperimentsRegistryManager.java Mon Oct 28 22:24:35 2019
@@ -163,14 +163,20 @@ public class ExperimentsRegistryManager
     // If newer refresh it, otherwise just update the JED duccId in case it is a newer AP
     IExperiment existingExperiment = experimentsByDir.get(directory);
     if (existingExperiment != null) {
-      if (fileTime <= existingExperiment.getFileDate()) {
+      // Synchronize the check for a newer state file with the rewrite of the file in Experiment.writeStateFile
+      // to ensure that the rewritten file does does not look newer that the in-memory Experiment
+      long existingFileTime;
+      synchronized (existingExperiment) {
+        existingFileTime = existingExperiment.getFileDate();
+      }
+      if (fileTime <= existingFileTime) {
         if (work != null) {
-          existingExperiment.updateJedId(work.getDuccId()); 
+          existingExperiment.updateJedId(work.getDuccId());
         }
         return;
       }
     }
-    
+
     // Load or reload changed state file
     String contents = ExperimentsRegistryUtilities.readFile(user, stateFile);
     if (contents == null) {

Modified: uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/ExperimentsRegistryUtilities.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/ExperimentsRegistryUtilities.java?rev=1869089&r1=1869088&r2=1869089&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/ExperimentsRegistryUtilities.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/ExperimentsRegistryUtilities.java Mon Oct 28 22:24:35 2019
@@ -128,6 +128,7 @@ public class ExperimentsRegistryUtilitie
     for (String arg : cmd.getArguments()) {
       args.append(arg).append(' ');
     }
+    args.append("--DRIVER.rerunTasks=~");   // Override any initial rerun list 
     StringBuilder envs = new StringBuilder();
     if (cmd instanceof ACommandLine) {
       for (Entry<String, String> ent : ((ACommandLine) cmd).getEnvironment().entrySet()) {
@@ -150,8 +151,8 @@ public class ExperimentsRegistryUtilitie
         "--description",             "JED---" + dwj.getStandardInfo().getLogDirectory()
     };
     
-    // Update state file with the user's umask AFTER successfully restoring the JED AP from the DB
-    if (!experiment.updateStateFile(dwj.getStandardInfo().getUmask())) {
+    // Write the state file with the user's umask AFTER successfully restoring the JED AP from the DB
+    if (!experiment.writeStateFile(dwj.getStandardInfo().getUmask())) {
       return false;
     }
     
@@ -161,7 +162,21 @@ public class ExperimentsRegistryUtilitie
     String sysout = DuccAsUser.execute(experiment.getUser(), null, submitCmd);
     WsLog.info(cName, mName, sysout);
     
-    return true;
+    // If launch failed clear the Experiment's "Restarting" state 
+    // Should report: "Managed Reservation ### submitted."
+    // If successful save the ID of JED
+    // If fails reset the status
+    boolean launched = sysout.startsWith("Managed Reservation");
+    if (launched) {
+      String[] toks = sysout.split("\\s+");
+      if (toks.length >= 3) {
+        experiment.updateStatus(toks[2]);
+      }
+    } else {
+      experiment.updateStatus(null);
+    }
+    
+    return launched;
   }
 
 }

Modified: uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/IExperiment.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/IExperiment.java?rev=1869089&r1=1869088&r2=1869089&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/IExperiment.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/IExperiment.java Mon Oct 28 22:24:35 2019
@@ -51,7 +51,12 @@ public interface IExperiment extends Com
   public void updateJedId(DuccId duccId);
   
   /*
-   *  Update the Experiment.state file indicating the tasks to be rerun
+   *  Set or clear the Experiment restart status
    */
-  public boolean updateStateFile(String umask);
+  public void updateStatus(String restartJedId);
+  
+  /*
+   * Write the updated state file
+   */
+  public boolean writeStateFile(String umask);
 }

Modified: uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/Jed.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/Jed.java?rev=1869089&r1=1869088&r2=1869089&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/Jed.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/xd/Jed.java Mon Oct 28 22:24:35 2019
@@ -24,12 +24,6 @@ public class Jed {
 
     Ducc_Job, Java, File, Exec, Trainer, Sequential, Sequential_Data, Parallel, Parallel_Data, Set_Phase, Other;
 
-    private static String DUCC_JOB = "DUCC_JOB";
-
-    private static String PARALLEL_DATA = "PARALLEL_DATA";
-
-    private static String SET_PHASE = "SET_PHASE";
-
     public static Type getEnum(String value) {
       Type retVal = Other;
       if (value == null) {
@@ -80,6 +74,8 @@ public class Jed {
     }
   }
 
+  // Note - the Restarting state is not saved in the Experiment.state file and is only applied to
+  // the state of the experiment ... not any individual tasks.
   public enum Status {
 
     Running, Restarting, Completed, Done, Canceled, Failed, DependencyFailed, Ignored, Unknown, Other;

Modified: uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/experiment.details.jsp
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/experiment.details.jsp?rev=1869089&r1=1869088&r2=1869089&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/experiment.details.jsp (original)
+++ uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/experiment.details.jsp Mon Oct 28 22:24:35 2019
@@ -133,12 +133,12 @@ if (table_style.equals("scroll")) {
 if (table_style.equals("classic")) {
 %>
 	<table width="100%">
-   	<caption title="Hint: use Preferences -> Table Style to alter format"><b>Experiment Details</b><br><i><small>click column heading to sort</small></i></caption>
-   	<tr>
-    <td>
-      <table class="sortable">
-		<thead>
-		<tr class="ducc-head">
+   	  <caption title="Hint: use Preferences -> Table Style to alter format"><b>Experiment Details</b><br><i><small>click column heading to sort</small></i></caption>
+   	  <tr>
+          <td>
+          <table class="sortable">
+	    <thead>
+	      <tr class="ducc-head">
 		<th title="The path id for this experiment subtask">Path Id</th>
 		<th title="The id for this experiment subtask" class="sorttable_numeric">Id</th>
 		<th title="The parent id for this experiment subtask" class="sorttable_numeric">Parent</th>
@@ -155,10 +155,10 @@ if (table_style.equals("classic")) {
 		<th title="The number of work items currently dispatched (queued+operating)">Dis-<br>patch</th>
 		<th title="The number of work items that were retried, excluding preemptions">Retry</th>
 		<th title="The number of work items that were preempted">Pre-<br>empt</th>
-		</tr>
-		</thead>
-		<tbody id="experiment_details_area">
-   		</tbody>
+	      </tr>
+	    </thead>
+	    <tbody id="experiment_details_area">
+   	    </tbody>
 	  </table>
    	</table>
 <%

Modified: uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/js/ducc.local.js
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/js/ducc.local.js?rev=1869089&r1=1869088&r2=1869089&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/js/ducc.local.js (original)
+++ uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/js/ducc.local.js Mon Oct 28 22:24:35 2019
@@ -116,7 +116,7 @@ function ducc_load_data(type, params) {
     }
     eval('ms_load_' + type_ + '_data = ms_now');
     if (params == undefined) {
-	params = location.search;
+	params ="";
     }
     var table_style = ducc_preferences_get("table_style");
     if (table_style == "scroll") {
@@ -131,7 +131,7 @@ function ducc_load_classic_data(type, pa
     var data = null;
 
     try {
-        var servlet = "/ducc-servlet/" + type + "-data" + params
+        var servlet = "/ducc-servlet/" + type + "-data" + location.search + params
         var tomsecs = ms_timeout;
         $.ajax({
             url: servlet,
@@ -153,7 +153,7 @@ function ducc_load_classic_data(type, pa
 function ducc_load_scroll_data(type, params)
 {
         try {
-                oTable.fnReloadAjax("/ducc-servlet/json-format-aaData-" + type + params, ducc_load_scroll_callback);
+                oTable.fnReloadAjax("/ducc-servlet/json-format-aaData-" + type + location.search + params, ducc_load_scroll_callback);
         }
         catch(err) {
                 ducc_error("ducc_load_scroll_data/"+type,err);
@@ -194,7 +194,7 @@ function ducc_load_identify_experiment_d
 	params = "";
     }
         try {
-                server_url= "/ducc-servlet/experiment-details-directory"+location.search+params;
+                server_url= "/ducc-servlet/experiment-details-directory" + location.search + params;
                 $.ajax(
                 {
                         url : server_url,
@@ -255,7 +255,7 @@ function ducc_update_page_local(type)
 function ducc_toggle_task_state(taskid)
 {
         try {
-                ducc_load_data("experiment-details", location.search+"&taskid="+taskid);
+                ducc_load_data("experiment-details", "&taskid="+taskid);
         }
         catch(err) {
                 ducc_error("ducc_toggle_task_state",err);
@@ -266,7 +266,7 @@ function ducc_restart_experiment()
 {
         try {
                 ducc_load_identify_experiment_details("&restart=true");
-                ducc_load_data("experiment-details");
+                ducc_load_data("experiment-details", "&restart=true");
         }
         catch(err) {
                 ducc_error("ducc_restart_experiment",err);