You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airavata.apache.org by di...@apache.org on 2020/06/20 06:04:27 UTC
[airavata] branch param-sweep updated: Adding fork sweep facility
for slurm
This is an automated email from the ASF dual-hosted git repository.
dimuthuupe pushed a commit to branch param-sweep
in repository https://gitbox.apache.org/repos/asf/airavata.git
The following commit(s) were added to refs/heads/param-sweep by this push:
new bda4684 Adding fork sweep facility for slurm
bda4684 is described below
commit bda4684ea683069238302efe31644c3a463e6b83
Author: Dimuthu Wannipurage <di...@gmail.com>
AuthorDate: Sat Jun 20 02:04:11 2020 -0400
Adding fork sweep facility for slurm
---
.../computeresource/JobManagerCommand.java | 5 ++-
.../impl/task/submission/config/JobFactory.java | 41 ++++++++++++++------
.../resources/SLURM_Fork_Sweep_Groovy.template | 44 ++++++++++++++++++++++
.../compute_resource_model.thrift | 3 +-
4 files changed, 80 insertions(+), 13 deletions(-)
diff --git a/airavata-api/airavata-data-models/src/main/java/org/apache/airavata/model/appcatalog/computeresource/JobManagerCommand.java b/airavata-api/airavata-data-models/src/main/java/org/apache/airavata/model/appcatalog/computeresource/JobManagerCommand.java
index ccb208a..21c6d09 100644
--- a/airavata-api/airavata-data-models/src/main/java/org/apache/airavata/model/appcatalog/computeresource/JobManagerCommand.java
+++ b/airavata-api/airavata-data-models/src/main/java/org/apache/airavata/model/appcatalog/computeresource/JobManagerCommand.java
@@ -60,7 +60,8 @@ public enum JobManagerCommand implements org.apache.thrift.TEnum {
CHECK_JOB(3),
SHOW_QUEUE(4),
SHOW_RESERVATION(5),
- SHOW_START(6);
+ SHOW_START(6),
+ SWEEPING_SUBMISSION_TYPE(7);
private final int value;
@@ -95,6 +96,8 @@ public enum JobManagerCommand implements org.apache.thrift.TEnum {
return SHOW_RESERVATION;
case 6:
return SHOW_START;
+ case 7:
+ return SWEEPING_SUBMISSION_TYPE;
default:
return null;
}
diff --git a/modules/airavata-helix/helix-spectator/src/main/java/org/apache/airavata/helix/impl/task/submission/config/JobFactory.java b/modules/airavata-helix/helix-spectator/src/main/java/org/apache/airavata/helix/impl/task/submission/config/JobFactory.java
index 3ec3662..1c84f7f 100644
--- a/modules/airavata-helix/helix-spectator/src/main/java/org/apache/airavata/helix/impl/task/submission/config/JobFactory.java
+++ b/modules/airavata-helix/helix-spectator/src/main/java/org/apache/airavata/helix/impl/task/submission/config/JobFactory.java
@@ -27,35 +27,52 @@ import org.apache.airavata.registry.cpi.AppCatalogException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.Optional;
+
public class JobFactory {
private final static Logger logger = LoggerFactory.getLogger(JobFactory.class);
- public static String getTemplateFileName(ResourceJobManagerType resourceJobManagerType, boolean isSweepType) {
+ private static final class SweepingType {
+ static final String JOB_ARRAY_TYPE = "array";
+ static final String FORK_TYPE = "fork";
+ }
+
+ public static Optional<String> getTemplateFileName(ResourceJobManager resourceJobManager, boolean isSweepType) {
+ ResourceJobManagerType resourceJobManagerType = resourceJobManager.getResourceJobManagerType();
if (isSweepType) {
switch (resourceJobManagerType) {
case SLURM:
- return "SLURM_Arr_Groovy.template";
+
+ switch (resourceJobManager.getJobManagerCommands().get(JobManagerCommand.SWEEPING_SUBMISSION_TYPE)) {
+ case SweepingType.JOB_ARRAY_TYPE:
+ return Optional.of("SLURM_Arr_Groovy.template");
+ case SweepingType.FORK_TYPE:
+ return Optional.of("SLURM_Fork_Sweep_Groovy.template");
+ default:
+ return Optional.empty();
+ }
+
default:
- return null;
+ return Optional.empty();
}
} else {
switch (resourceJobManagerType) {
case FORK:
- return "FORK_Groovy.template";
+ return Optional.of("FORK_Groovy.template");
case PBS:
- return "PBS_Groovy.template";
+ return Optional.of("PBS_Groovy.template");
case SLURM:
- return "SLURM_Groovy.template";
+ return Optional.of("SLURM_Groovy.template");
case UGE:
- return "UGE_Groovy.template";
+ return Optional.of("UGE_Groovy.template");
case LSF:
- return "LSF_Groovy.template";
+ return Optional.of("LSF_Groovy.template");
case CLOUD:
- return "CLOUD_Groovy.template";
+ return Optional.of("CLOUD_Groovy.template");
default:
- return null;
+ return Optional.empty();
}
}
}
@@ -112,7 +129,9 @@ public class JobFactory {
throw new Exception("Resource job manager can not be null");
}
- String templateFileName = getTemplateFileName(resourceJobManager.getResourceJobManagerType(), isSweepType);
+ String templateFileName = getTemplateFileName(resourceJobManager, isSweepType)
+ .orElseThrow(() -> new Exception("Template file name can not be null"));
+
switch (resourceJobManager.getResourceJobManagerType()) {
case PBS:
return new PBSJobConfiguration(templateFileName, ".pbs", resourceJobManager.getJobManagerBinPath(),
diff --git a/modules/configuration/server/src/main/resources/SLURM_Fork_Sweep_Groovy.template b/modules/configuration/server/src/main/resources/SLURM_Fork_Sweep_Groovy.template
new file mode 100644
index 0000000..0042f88
--- /dev/null
+++ b/modules/configuration/server/src/main/resources/SLURM_Fork_Sweep_Groovy.template
@@ -0,0 +1,44 @@
+#!${shellName}
+
+# SLURM job submission script generated by Apache Airavata
+<%
+if (queueName != null && queueName != "") out.print '#SBATCH -p ' + queueName + '\n'
+ if (nodes != null && nodes != "") out.print '#SBATCH -N ' + nodes + '\n'
+ if (cpuCount != null && cpuCount != "") out.print '#SBATCH -n ' + cpuCount + '\n'
+ if (usedMem != null && usedMem != "") out.print '#SBATCH --mem=' + usedMem + 'M\n'
+ if (mailAddress != null && mailAddress != "") out.print '#SBATCH --mail-user=' + mailAddress + '\n'
+ if (accountString != null && accountString != "" ) out.print '#SBATCH -A ' + accountString + '\n'
+ if (maxWallTime != null && maxWallTime != "") out.print '#SBATCH -t ' + maxWallTime + '\n'
+ if (jobName != null && jobName != "") out.print '#SBATCH -J ' + jobName + '\n'
+ if (standardOutFile != null && standardOutFile != "") out.print '#SBATCH -o ' + workingDirectory + '/' + standardOutFile + '\n'
+ if (standardErrorFile != null && standardErrorFile != "") out.print '#SBATCH -e ' + workingDirectory + '/' + standardErrorFile + '\n'
+ if (qualityOfService != null && qualityOfService != "") out.print '#SBATCH --qos=' + qualityOfService + '\n'
+ if (reservation != null && reservation != "") out.print '#SBATCH --reservation=' + reservation + '\n'
+ if (queueSpecificMacros != null) for(queueMacro in queueSpecificMacros) out.print queueMacro +'\n'
+%>
+#SBATCH --mail-type=ALL
+
+<% if (exports != null) for(com in exports) out.print 'export ' + com +'\n'
+ if (moduleCommands != null) for(mc in moduleCommands) out.print mc +'\n'
+ if (workingDirectory != null && workingDirectory != "") out.print 'cd ' + workingDirectory +'\n'
+
+ out.print 'array=( ' + sweepRange.join(' ') + ' )\n'
+ out.print 'for i in "${array[@]}"\n'
+ out.print 'do \n'
+ out.print 'pushd $i \n'
+
+ out.print '(\n'
+
+ if (preJobCommands != null) for(pjc in preJobCommands) out.print pjc +' ;\n'
+ if (jobSubmitterCommand != null && jobSubmitterCommand != "") out.print jobSubmitterCommand + ' '
+ if (executablePath != null && executablePath != "") out.print executablePath + ' '
+ if (inputs != null) for(input in inputs) out.print input + ' '
+ out.print ' > ' + standardOutFile + ' 2>' + standardErrorFile + ' ;\n'
+ if (postJobCommands != null) for(pjc in postJobCommands) out.print 'export SLURM_ARRAY_TASK_ID=$i; ' + pjc +' ;\n'
+
+ out.print '\n)&\n'
+
+ out.print 'popd\n'
+ out.print 'done\n'
+ out.print 'wait\n'
+%>
diff --git a/thrift-interface-descriptions/data-models/resource-catalog-models/compute_resource_model.thrift b/thrift-interface-descriptions/data-models/resource-catalog-models/compute_resource_model.thrift
index c4d0137..83b1b1d 100644
--- a/thrift-interface-descriptions/data-models/resource-catalog-models/compute_resource_model.thrift
+++ b/thrift-interface-descriptions/data-models/resource-catalog-models/compute_resource_model.thrift
@@ -89,7 +89,8 @@ enum JobManagerCommand {
CHECK_JOB,
SHOW_QUEUE,
SHOW_RESERVATION,
- SHOW_START
+ SHOW_START,
+ SWEEPING_SUBMISSION_TYPE
}