You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airavata.apache.org by di...@apache.org on 2020/06/20 06:04:27 UTC

[airavata] branch param-sweep updated: Adding fork sweep facility for slurm

This is an automated email from the ASF dual-hosted git repository.

dimuthuupe pushed a commit to branch param-sweep
in repository https://gitbox.apache.org/repos/asf/airavata.git


The following commit(s) were added to refs/heads/param-sweep by this push:
     new bda4684  Adding fork sweep facility for slurm
bda4684 is described below

commit bda4684ea683069238302efe31644c3a463e6b83
Author: Dimuthu Wannipurage <di...@gmail.com>
AuthorDate: Sat Jun 20 02:04:11 2020 -0400

    Adding fork sweep facility for slurm
---
 .../computeresource/JobManagerCommand.java         |  5 ++-
 .../impl/task/submission/config/JobFactory.java    | 41 ++++++++++++++------
 .../resources/SLURM_Fork_Sweep_Groovy.template     | 44 ++++++++++++++++++++++
 .../compute_resource_model.thrift                  |  3 +-
 4 files changed, 80 insertions(+), 13 deletions(-)

diff --git a/airavata-api/airavata-data-models/src/main/java/org/apache/airavata/model/appcatalog/computeresource/JobManagerCommand.java b/airavata-api/airavata-data-models/src/main/java/org/apache/airavata/model/appcatalog/computeresource/JobManagerCommand.java
index ccb208a..21c6d09 100644
--- a/airavata-api/airavata-data-models/src/main/java/org/apache/airavata/model/appcatalog/computeresource/JobManagerCommand.java
+++ b/airavata-api/airavata-data-models/src/main/java/org/apache/airavata/model/appcatalog/computeresource/JobManagerCommand.java
@@ -60,7 +60,8 @@ public enum JobManagerCommand implements org.apache.thrift.TEnum {
   CHECK_JOB(3),
   SHOW_QUEUE(4),
   SHOW_RESERVATION(5),
-  SHOW_START(6);
+  SHOW_START(6),
+  SWEEPING_SUBMISSION_TYPE(7);
 
   private final int value;
 
@@ -95,6 +96,8 @@ public enum JobManagerCommand implements org.apache.thrift.TEnum {
         return SHOW_RESERVATION;
       case 6:
         return SHOW_START;
+      case 7:
+        return SWEEPING_SUBMISSION_TYPE;
       default:
         return null;
     }
diff --git a/modules/airavata-helix/helix-spectator/src/main/java/org/apache/airavata/helix/impl/task/submission/config/JobFactory.java b/modules/airavata-helix/helix-spectator/src/main/java/org/apache/airavata/helix/impl/task/submission/config/JobFactory.java
index 3ec3662..1c84f7f 100644
--- a/modules/airavata-helix/helix-spectator/src/main/java/org/apache/airavata/helix/impl/task/submission/config/JobFactory.java
+++ b/modules/airavata-helix/helix-spectator/src/main/java/org/apache/airavata/helix/impl/task/submission/config/JobFactory.java
@@ -27,35 +27,52 @@ import org.apache.airavata.registry.cpi.AppCatalogException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.Optional;
+
 public class JobFactory {
 
     private final static Logger logger = LoggerFactory.getLogger(JobFactory.class);
 
-    public static String getTemplateFileName(ResourceJobManagerType resourceJobManagerType, boolean isSweepType) {
+    private static final class SweepingType {
+        static final String JOB_ARRAY_TYPE = "array";
+        static final String FORK_TYPE = "fork";
+    }
+
+    public static Optional<String> getTemplateFileName(ResourceJobManager resourceJobManager, boolean isSweepType) {
 
+        ResourceJobManagerType resourceJobManagerType = resourceJobManager.getResourceJobManagerType();
         if (isSweepType) {
             switch (resourceJobManagerType) {
                 case SLURM:
-                    return "SLURM_Arr_Groovy.template";
+
+                    switch (resourceJobManager.getJobManagerCommands().get(JobManagerCommand.SWEEPING_SUBMISSION_TYPE)) {
+                        case SweepingType.JOB_ARRAY_TYPE:
+                            return Optional.of("SLURM_Arr_Groovy.template");
+                        case SweepingType.FORK_TYPE:
+                            return Optional.of("SLURM_Fork_Sweep_Groovy.template");
+                        default:
+                            return Optional.empty();
+                    }
+
                 default:
-                    return null;
+                    return Optional.empty();
             }
         } else {
             switch (resourceJobManagerType) {
                 case FORK:
-                    return "FORK_Groovy.template";
+                    return Optional.of("FORK_Groovy.template");
                 case PBS:
-                    return "PBS_Groovy.template";
+                    return Optional.of("PBS_Groovy.template");
                 case SLURM:
-                    return "SLURM_Groovy.template";
+                    return Optional.of("SLURM_Groovy.template");
                 case UGE:
-                    return "UGE_Groovy.template";
+                    return Optional.of("UGE_Groovy.template");
                 case LSF:
-                    return "LSF_Groovy.template";
+                    return Optional.of("LSF_Groovy.template");
                 case CLOUD:
-                    return "CLOUD_Groovy.template";
+                    return Optional.of("CLOUD_Groovy.template");
                 default:
-                    return null;
+                    return Optional.empty();
             }
         }
     }
@@ -112,7 +129,9 @@ public class JobFactory {
             throw new Exception("Resource job manager can not be null");
         }
 
-        String templateFileName = getTemplateFileName(resourceJobManager.getResourceJobManagerType(), isSweepType);
+        String templateFileName = getTemplateFileName(resourceJobManager, isSweepType)
+                                .orElseThrow(() -> new Exception("Template file name can not be null"));
+
         switch (resourceJobManager.getResourceJobManagerType()) {
             case PBS:
                 return new PBSJobConfiguration(templateFileName, ".pbs", resourceJobManager.getJobManagerBinPath(),
diff --git a/modules/configuration/server/src/main/resources/SLURM_Fork_Sweep_Groovy.template b/modules/configuration/server/src/main/resources/SLURM_Fork_Sweep_Groovy.template
new file mode 100644
index 0000000..0042f88
--- /dev/null
+++ b/modules/configuration/server/src/main/resources/SLURM_Fork_Sweep_Groovy.template
@@ -0,0 +1,44 @@
+#!${shellName}
+
+# SLURM job submission script generated by Apache Airavata
+<%
+if (queueName != null && queueName != "") out.print '#SBATCH -p ' + queueName + '\n'
+   if (nodes != null && nodes != "") out.print '#SBATCH -N ' + nodes + '\n'
+   if (cpuCount != null && cpuCount != "") out.print '#SBATCH -n ' + cpuCount + '\n'
+   if (usedMem != null && usedMem != "") out.print '#SBATCH --mem=' + usedMem + 'M\n'
+   if (mailAddress != null && mailAddress != "") out.print '#SBATCH --mail-user=' + mailAddress + '\n'
+   if (accountString != null && accountString != "" ) out.print '#SBATCH -A ' + accountString + '\n'
+   if (maxWallTime != null && maxWallTime != "") out.print '#SBATCH -t ' + maxWallTime + '\n'
+   if (jobName != null && jobName != "") out.print '#SBATCH -J ' + jobName + '\n'
+   if (standardOutFile != null && standardOutFile != "") out.print '#SBATCH -o ' + workingDirectory + '/' + standardOutFile + '\n'
+   if (standardErrorFile != null && standardErrorFile != "") out.print '#SBATCH -e ' + workingDirectory + '/' + standardErrorFile + '\n'
+   if (qualityOfService != null && qualityOfService != "") out.print '#SBATCH --qos=' + qualityOfService + '\n'
+   if (reservation != null && reservation != "") out.print '#SBATCH --reservation=' + reservation + '\n'
+   if (queueSpecificMacros != null) for(queueMacro in queueSpecificMacros)  out.print queueMacro +'\n'
+%>
+#SBATCH --mail-type=ALL
+
+<% if (exports != null) for(com in exports)  out.print 'export ' + com +'\n'
+   if (moduleCommands != null) for(mc in moduleCommands)  out.print mc +'\n'
+   if (workingDirectory != null && workingDirectory != "")  out.print 'cd ' + workingDirectory +'\n'
+
+   out.print 'array=( ' + sweepRange.join(' ') + ' )\n'
+   out.print 'for i in "${array[@]}"\n'
+   out.print 'do \n'
+   out.print 'pushd $i \n'
+
+   out.print '(\n'
+
+   if (preJobCommands != null) for(pjc in preJobCommands)  out.print pjc +' ;\n'
+   if (jobSubmitterCommand != null && jobSubmitterCommand != "")  out.print jobSubmitterCommand + ' '
+   if (executablePath != null && executablePath != "")  out.print  executablePath + ' '
+   if (inputs != null) for(input in inputs)  out.print input + ' '
+   out.print ' > ' + standardOutFile + ' 2>' + standardErrorFile + ' ;\n'
+   if (postJobCommands != null) for(pjc in postJobCommands)  out.print 'export SLURM_ARRAY_TASK_ID=$i; ' + pjc +' ;\n'
+
+   out.print '\n)&\n'
+
+   out.print 'popd\n'
+   out.print 'done\n'
+   out.print 'wait\n'
+%>
diff --git a/thrift-interface-descriptions/data-models/resource-catalog-models/compute_resource_model.thrift b/thrift-interface-descriptions/data-models/resource-catalog-models/compute_resource_model.thrift
index c4d0137..83b1b1d 100644
--- a/thrift-interface-descriptions/data-models/resource-catalog-models/compute_resource_model.thrift
+++ b/thrift-interface-descriptions/data-models/resource-catalog-models/compute_resource_model.thrift
@@ -89,7 +89,8 @@ enum JobManagerCommand {
     CHECK_JOB,
     SHOW_QUEUE,
     SHOW_RESERVATION,
-    SHOW_START
+    SHOW_START,
+    SWEEPING_SUBMISSION_TYPE
 }