You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airavata.apache.org by sh...@apache.org on 2015/06/08 17:42:46 UTC

airavata git commit: Merge changes have done for release branch to master

Repository: airavata
Updated Branches:
  refs/heads/master 1ba83f11f -> 144bb8f6d


Merge changes have done for release branch to master


Project: http://git-wip-us.apache.org/repos/asf/airavata/repo
Commit: http://git-wip-us.apache.org/repos/asf/airavata/commit/144bb8f6
Tree: http://git-wip-us.apache.org/repos/asf/airavata/tree/144bb8f6
Diff: http://git-wip-us.apache.org/repos/asf/airavata/diff/144bb8f6

Branch: refs/heads/master
Commit: 144bb8f6d4ab1b7af305877c77ac8d6054473ae8
Parents: 1ba83f1
Author: Shameera Rathanyaka <sh...@gmail.com>
Authored: Mon Jun 8 11:42:43 2015 -0400
Committer: Shameera Rathanyaka <sh...@gmail.com>
Committed: Mon Jun 8 11:42:43 2015 -0400

----------------------------------------------------------------------
 .../gsi/ssh/api/job/PBSJobConfiguration.java    |  2 +-
 .../gfac/gsi/ssh/api/job/PBSOutputParser.java   |  6 +++-
 .../gsi/ssh/impl/GSISSHAbstractCluster.java     |  1 +
 .../gfac/monitor/email/EmailBasedMonitor.java   |  9 ++++++
 .../monitor/email/parser/PBSEmailParser.java    | 12 ++++----
 .../gfac/ssh/provider/impl/SSHProvider.java     | 29 +++++++++++++-------
 6 files changed, 41 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/airavata/blob/144bb8f6/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSJobConfiguration.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSJobConfiguration.java b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSJobConfiguration.java
index d3f6c9c..c5be412 100644
--- a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSJobConfiguration.java
+++ b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSJobConfiguration.java
@@ -101,7 +101,7 @@ public class PBSJobConfiguration implements JobManagerConfiguration {
     @Override
     public RawCommandInfo getJobIdMonitorCommand(String jobName, String userName) {
         // For PBS there is no option to get jobDetails by JobName, so we search with userName
-        return new RawCommandInfo(this.installedPath + "qstat -u " + userName);
+        return new RawCommandInfo(this.installedPath + "qstat -u " + userName + " -f  | grep \"Job_Name = " + jobName + "\" -B1");
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/airavata/blob/144bb8f6/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSOutputParser.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSOutputParser.java b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSOutputParser.java
index a86d7f0..15e2405 100644
--- a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSOutputParser.java
+++ b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSOutputParser.java
@@ -194,8 +194,12 @@ public class PBSOutputParser implements OutputParser {
 
     @Override
     public String parseJobId(String jobName, String rawOutput) throws SSHApiException {
+        /* output will look like
+        Job Id: 2080802.gordon-fe2.local
+            Job_Name = A312402627
+        */
         String regJobId = "jobId";
-        Pattern pattern = Pattern.compile("\\s*(?<" + regJobId + ">[^\\s]*).* " + jobName + " "); // regex , JOB_ID will come as first column
+        Pattern pattern = Pattern.compile("(?<" + regJobId + ">[^\\s]*)\\s*.* " + jobName);
         if (rawOutput != null) {
             Matcher matcher = pattern.matcher(rawOutput);
             if (matcher.find()) {

http://git-wip-us.apache.org/repos/asf/airavata/blob/144bb8f6/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/impl/GSISSHAbstractCluster.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/impl/GSISSHAbstractCluster.java b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/impl/GSISSHAbstractCluster.java
index 04241c8..113e4ec 100644
--- a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/impl/GSISSHAbstractCluster.java
+++ b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/impl/GSISSHAbstractCluster.java
@@ -631,6 +631,7 @@ public class GSISSHAbstractCluster implements RemoteCluster {
         return files;
     }
 
+    @Deprecated
     public synchronized void getJobStatuses(String userName, Map<String,JobStatus> jobIDs)throws SSHApiException {
         int retry = 3;
         RawCommandInfo rawCommandInfo = jobManagerConfiguration.getUserBasedMonitorCommand(userName);

http://git-wip-us.apache.org/repos/asf/airavata/blob/144bb8f6/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/EmailBasedMonitor.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/EmailBasedMonitor.java b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/EmailBasedMonitor.java
index eea6ef6..992317d 100644
--- a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/EmailBasedMonitor.java
+++ b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/EmailBasedMonitor.java
@@ -24,6 +24,8 @@ import org.apache.airavata.common.exception.AiravataException;
 import org.apache.airavata.common.logger.AiravataLogger;
 import org.apache.airavata.common.logger.AiravataLoggerFactory;
 import org.apache.airavata.common.utils.ServerSettings;
+import org.apache.airavata.gfac.core.GFacException;
+import org.apache.airavata.gfac.core.GFacUtils;
 import org.apache.airavata.gfac.core.context.JobExecutionContext;
 import org.apache.airavata.gfac.core.GFacThreadPoolExecutor;
 import org.apache.airavata.gfac.core.monitor.JobStatusResult;
@@ -36,6 +38,8 @@ import org.apache.airavata.gfac.monitor.email.parser.UGEEmailParser;
 import org.apache.airavata.model.appcatalog.computeresource.ResourceJobManagerType;
 import org.apache.airavata.model.messaging.event.JobIdentifier;
 import org.apache.airavata.model.messaging.event.JobStatusChangeRequestEvent;
+import org.apache.airavata.model.workspace.experiment.CorrectiveAction;
+import org.apache.airavata.model.workspace.experiment.ErrorCategory;
 import org.apache.airavata.model.workspace.experiment.JobState;
 import org.apache.airavata.model.workspace.experiment.JobStatus;
 
@@ -284,6 +288,11 @@ public class EmailBasedMonitor implements Runnable{
             jobMonitorMap.remove(jobStatusResult.getJobId());
             runOutHandlers = true;
             log.info("[EJM]: Job failed email received , removed job from job monitoring. " + jobDetails);
+            try {
+                GFacUtils.saveErrorDetails(jEC, "Job runs on remote compute resource failed", CorrectiveAction.RETRY_SUBMISSION, ErrorCategory.APPLICATION_FAILURE);
+            } catch (GFacException e) {
+                log.info("[EJM]: Error while saving error details for jobId:{}, expId: {}", jEC.getJobDetails().getJobID(), jEC.getExperimentID());
+            }
         }else if (resultState == JobState.CANCELED) {
             jobMonitorMap.remove(jobStatusResult.getJobId());
             runOutHandlers = false; // Do we need to run out handlers in canceled case?

http://git-wip-us.apache.org/repos/asf/airavata/blob/144bb8f6/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/parser/PBSEmailParser.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/parser/PBSEmailParser.java b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/parser/PBSEmailParser.java
index 4a3c88b..8474d62 100644
--- a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/parser/PBSEmailParser.java
+++ b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/parser/PBSEmailParser.java
@@ -34,17 +34,17 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 public class PBSEmailParser implements EmailParser {
-
     private static final Logger log = LoggerFactory.getLogger(PBSEmailParser.class);
-
-
-    private static final String REGEX = "[a-zA-Z ]*:[ ]*(?<" +  JOBID + ">[a-zA-Z0-9-\\.]*)\\s+[a-zA-Z ]*:[ ]*(?<"+
-            JOBNAME + ">[a-zA-Z0-9-\\.]*)\\s+.*\\s+(?<" + STATUS + ">[a-zA-Z\\ ]*)";
-    private static final String REGEX_EXIT_STATUS = "Exit_status=(?<" + EXIT_STATUS + ">[\\d]+)";
     public static final String BEGUN_EXECUTION = "Begun execution";
     public static final String EXECUTION_TERMINATED = "Execution terminated";
     public static final String ABORTED_BY_PBS_SERVER = "Aborted by PBS Server";
 
+    static final String REGEX = "[a-zA-Z ]*:[ ]*(?<" + JOBID + ">[a-zA-Z0-9-\\.]*)\\s+[a-zA-Z ]*:[ ]*(?<" +
+            JOBNAME + ">[a-zA-Z0-9-\\.]*)\\s[\\S|\\s]*(?<" + STATUS + ">" + BEGUN_EXECUTION + "|" +
+            EXECUTION_TERMINATED + "|" + ABORTED_BY_PBS_SERVER + ")";
+
+    private static final String REGEX_EXIT_STATUS = "Exit_status=(?<" + EXIT_STATUS + ">[\\d]+)";
+
     @Override
     public JobStatusResult parseEmail(Message message) throws MessagingException, AiravataException {
         JobStatusResult jobStatusResult = new JobStatusResult();

http://git-wip-us.apache.org/repos/asf/airavata/blob/144bb8f6/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/ssh/provider/impl/SSHProvider.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/ssh/provider/impl/SSHProvider.java b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/ssh/provider/impl/SSHProvider.java
index d1b35cd..d00c698 100644
--- a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/ssh/provider/impl/SSHProvider.java
+++ b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/ssh/provider/impl/SSHProvider.java
@@ -22,6 +22,7 @@
 package org.apache.airavata.gfac.ssh.provider.impl;
 
 import org.apache.airavata.gfac.core.cluster.RemoteCluster;
+import org.apache.airavata.model.workspace.experiment.TaskState;
 import org.apache.airavata.registry.cpi.AppCatalogException;
 import org.apache.airavata.common.exception.AiravataException;
 import org.apache.airavata.common.exception.ApplicationSettingsException;
@@ -172,20 +173,28 @@ public class SSHProvider extends AbstractProvider {
                         }
                     } else {
                         jobExecutionContext.setJobDetails(jobDetails);
-                        String verifyJobId = verifyJobSubmission(remoteCluster, jobDetails);
-                        if (verifyJobId != null && !verifyJobId.isEmpty()) {
-                            // JobStatus either changed from SUBMITTED to QUEUED or directly to QUEUED
-                            jobID = verifyJobId;
-                            jobDetails.setJobID(jobID);
-                            monitorPublisher.publish(new GfacExperimentStateChangeRequest(new MonitorID(jobExecutionContext)
-                                    , GfacExperimentState.JOBSUBMITTED));
-                            GFacUtils.saveJobStatus(jobExecutionContext, jobDetails, JobState.QUEUED);
+                        int verificationTryCount = 0;
+                        while (verificationTryCount++ < 3) {
+                            String verifyJobId = verifyJobSubmission(remoteCluster, jobDetails);
+                            if (verifyJobId != null && !verifyJobId.isEmpty()) {
+                                // JobStatus either changed from SUBMITTED to QUEUED or directly to QUEUED
+                                jobID = verifyJobId;
+                                jobDetails.setJobID(jobID);
+                                monitorPublisher.publish(new GfacExperimentStateChangeRequest(new MonitorID(jobExecutionContext)
+                                        , GfacExperimentState.JOBSUBMITTED));
+                                GFacUtils.saveJobStatus(jobExecutionContext, jobDetails, JobState.QUEUED);
+                                break;
+                            }
+                            Thread.sleep(verificationTryCount * 1000);
                         }
                     }
 
                     if (jobID == null || jobID.isEmpty()) {
-                        log.error("Couldn't find remote jobId for JobName:" + jobDetails.getJobName() + ", ExperimentId:" + jobExecutionContext.getExperimentID());
-                        GFacUtils.updateExperimentStatus(jobExecutionContext.getExperimentID(), ExperimentState.FAILED);
+                        String msg = "expId:" + jobExecutionContext.getExperimentID() + " Couldn't find remote jobId for JobName:"
+                                + jobDetails.getJobName() + ", both submit and verify steps doesn't return a valid JobId. Hence changing experiment state to Failed";
+                        log.error(msg);
+                        GFacUtils.saveErrorDetails(jobExecutionContext, msg, CorrectiveAction.CONTACT_SUPPORT, ErrorCategory.AIRAVATA_INTERNAL_ERROR);
+                        GFacUtils.publishTaskStatus(jobExecutionContext, monitorPublisher, TaskState.FAILED);
                         return;
                     }
                     data.append("jobDesc=").append(jobDescriptor.toXML());