You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airavata.apache.org by ch...@apache.org on 2015/05/19 20:23:20 UTC

airavata git commit: cancel job fix

Repository: airavata
Updated Branches:
  refs/heads/master ad41af738 -> 7229a4abd


cancel job fix


Project: http://git-wip-us.apache.org/repos/asf/airavata/repo
Commit: http://git-wip-us.apache.org/repos/asf/airavata/commit/7229a4ab
Tree: http://git-wip-us.apache.org/repos/asf/airavata/tree/7229a4ab
Diff: http://git-wip-us.apache.org/repos/asf/airavata/diff/7229a4ab

Branch: refs/heads/master
Commit: 7229a4abd03ba06325d6245a962ff5b754a95df7
Parents: ad41af7
Author: Chathuri Wimalasena <ch...@apache.org>
Authored: Tue May 19 14:23:13 2015 -0400
Committer: Chathuri Wimalasena <ch...@apache.org>
Committed: Tue May 19 14:23:13 2015 -0400

----------------------------------------------------------------------
 .../gfac/ssh/provider/impl/SSHProvider.java         |  1 +
 .../airavata/gsi/ssh/api/job/SlurmOutputParser.java |  7 ++++++-
 .../gsi/ssh/impl/GSISSHAbstractCluster.java         | 16 +++++++++-------
 3 files changed, 16 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/airavata/blob/7229a4ab/modules/gfac/gfac-ssh/src/main/java/org/apache/airavata/gfac/ssh/provider/impl/SSHProvider.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-ssh/src/main/java/org/apache/airavata/gfac/ssh/provider/impl/SSHProvider.java b/modules/gfac/gfac-ssh/src/main/java/org/apache/airavata/gfac/ssh/provider/impl/SSHProvider.java
index b5c94e2..b28c008 100644
--- a/modules/gfac/gfac-ssh/src/main/java/org/apache/airavata/gfac/ssh/provider/impl/SSHProvider.java
+++ b/modules/gfac/gfac-ssh/src/main/java/org/apache/airavata/gfac/ssh/provider/impl/SSHProvider.java
@@ -264,6 +264,7 @@ public class SSHProvider extends AbstractProvider {
                         GFacUtils.saveJobStatus(jobExecutionContext, jobDetails, JobState.CANCELED, monitorPublisher);
                     } else {
                         log.info("Job Cancel operation failed");
+                        GFacUtils.saveJobStatus(jobExecutionContext, jobDetails, JobState.FAILED, monitorPublisher);
                     }
                 } else {
                     log.error("No Job Id is set, so cannot perform the cancel operation !!!");

http://git-wip-us.apache.org/repos/asf/airavata/blob/7229a4ab/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/SlurmOutputParser.java
----------------------------------------------------------------------
diff --git a/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/SlurmOutputParser.java b/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/SlurmOutputParser.java
index b134366..d37d444 100644
--- a/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/SlurmOutputParser.java
+++ b/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/SlurmOutputParser.java
@@ -76,7 +76,12 @@ public class SlurmOutputParser implements OutputParser {
                             column++;
                             break;
                         case 6:
-                            descriptor.setNodes(Integer.parseInt(each));
+                            try {
+                                int nodes = Integer.parseInt(each);
+                                descriptor.setNodes(nodes);
+                            }catch (Exception e){
+                                log.error("Node count read from command output is not an integer !!!");
+                            }
                             column++;
                             break;
                         case 7:

http://git-wip-us.apache.org/repos/asf/airavata/blob/7229a4ab/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/impl/GSISSHAbstractCluster.java
----------------------------------------------------------------------
diff --git a/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/impl/GSISSHAbstractCluster.java b/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/impl/GSISSHAbstractCluster.java
index 022c92f..7a04f11 100644
--- a/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/impl/GSISSHAbstractCluster.java
+++ b/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/impl/GSISSHAbstractCluster.java
@@ -237,23 +237,25 @@ public class GSISSHAbstractCluster implements Cluster {
     }
 
     public synchronized JobDescriptor cancelJob(String jobID) throws SSHApiException {
-        JobDescriptor jobDescriptorById = getJobDescriptorById(jobID);
-        if (jobDescriptorById.getStatus() == null || jobDescriptorById.getStatus().isEmpty()) {
-            return null;
-        }
-        JobStatus jobStatus = JobStatus.valueOf(jobDescriptorById.getStatus());
-        if (jobStatus == JobStatus.U || jobStatus == JobStatus.F) { // TODO: add other cases. Which lead to invalid cancel.
+        JobStatus jobStatus = getJobStatus(jobID);
+        if (jobStatus == null || jobStatus == JobStatus.U) {
             log.info("Validation before cancel is failed, couldn't found job in remote host to cancel. Job may be already completed|failed|canceled");
             return null;
         }
         RawCommandInfo rawCommandInfo = jobManagerConfiguration.getCancelCommand(jobID);
+
         StandardOutReader stdOutReader = new StandardOutReader();
         log.info("Executing RawCommand : " + rawCommandInfo.getCommand());
         CommandExecutor.executeCommand(rawCommandInfo, this.getSession(), stdOutReader);
         String outputifAvailable = getOutputifAvailable(stdOutReader, "Error reading output of job submission", jobManagerConfiguration.getBaseCancelCommand());
         // this might not be the case for all teh resources, if so Cluster implementation can override this method
         // because here after cancelling we try to get the job description and return it back
-        return jobDescriptorById;
+        try {
+            return this.getJobDescriptorById(jobID);
+        } catch (Exception e) {
+            //its ok to fail to get status when the job is gone
+            return null;
+        }
     }
 
     public synchronized String submitBatchJobWithScript(String scriptPath, String workingDirectory) throws SSHApiException {