You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airavata.apache.org by sh...@apache.org on 2015/06/08 17:42:46 UTC
airavata git commit: Merge changes have done for release branch to
master
Repository: airavata
Updated Branches:
refs/heads/master 1ba83f11f -> 144bb8f6d
Merge changes have done for release branch to master
Project: http://git-wip-us.apache.org/repos/asf/airavata/repo
Commit: http://git-wip-us.apache.org/repos/asf/airavata/commit/144bb8f6
Tree: http://git-wip-us.apache.org/repos/asf/airavata/tree/144bb8f6
Diff: http://git-wip-us.apache.org/repos/asf/airavata/diff/144bb8f6
Branch: refs/heads/master
Commit: 144bb8f6d4ab1b7af305877c77ac8d6054473ae8
Parents: 1ba83f1
Author: Shameera Rathanyaka <sh...@gmail.com>
Authored: Mon Jun 8 11:42:43 2015 -0400
Committer: Shameera Rathanyaka <sh...@gmail.com>
Committed: Mon Jun 8 11:42:43 2015 -0400
----------------------------------------------------------------------
.../gsi/ssh/api/job/PBSJobConfiguration.java | 2 +-
.../gfac/gsi/ssh/api/job/PBSOutputParser.java | 6 +++-
.../gsi/ssh/impl/GSISSHAbstractCluster.java | 1 +
.../gfac/monitor/email/EmailBasedMonitor.java | 9 ++++++
.../monitor/email/parser/PBSEmailParser.java | 12 ++++----
.../gfac/ssh/provider/impl/SSHProvider.java | 29 +++++++++++++-------
6 files changed, 41 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/airavata/blob/144bb8f6/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSJobConfiguration.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSJobConfiguration.java b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSJobConfiguration.java
index d3f6c9c..c5be412 100644
--- a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSJobConfiguration.java
+++ b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSJobConfiguration.java
@@ -101,7 +101,7 @@ public class PBSJobConfiguration implements JobManagerConfiguration {
@Override
public RawCommandInfo getJobIdMonitorCommand(String jobName, String userName) {
// For PBS there is no option to get jobDetails by JobName, so we search with userName
- return new RawCommandInfo(this.installedPath + "qstat -u " + userName);
+ return new RawCommandInfo(this.installedPath + "qstat -u " + userName + " -f | grep \"Job_Name = " + jobName + "\" -B1");
}
@Override
http://git-wip-us.apache.org/repos/asf/airavata/blob/144bb8f6/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSOutputParser.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSOutputParser.java b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSOutputParser.java
index a86d7f0..15e2405 100644
--- a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSOutputParser.java
+++ b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/api/job/PBSOutputParser.java
@@ -194,8 +194,12 @@ public class PBSOutputParser implements OutputParser {
@Override
public String parseJobId(String jobName, String rawOutput) throws SSHApiException {
+ /* output will look like
+ Job Id: 2080802.gordon-fe2.local
+ Job_Name = A312402627
+ */
String regJobId = "jobId";
- Pattern pattern = Pattern.compile("\\s*(?<" + regJobId + ">[^\\s]*).* " + jobName + " "); // regex , JOB_ID will come as first column
+ Pattern pattern = Pattern.compile("(?<" + regJobId + ">[^\\s]*)\\s*.* " + jobName);
if (rawOutput != null) {
Matcher matcher = pattern.matcher(rawOutput);
if (matcher.find()) {
http://git-wip-us.apache.org/repos/asf/airavata/blob/144bb8f6/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/impl/GSISSHAbstractCluster.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/impl/GSISSHAbstractCluster.java b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/impl/GSISSHAbstractCluster.java
index 04241c8..113e4ec 100644
--- a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/impl/GSISSHAbstractCluster.java
+++ b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/gsi/ssh/impl/GSISSHAbstractCluster.java
@@ -631,6 +631,7 @@ public class GSISSHAbstractCluster implements RemoteCluster {
return files;
}
+ @Deprecated
public synchronized void getJobStatuses(String userName, Map<String,JobStatus> jobIDs)throws SSHApiException {
int retry = 3;
RawCommandInfo rawCommandInfo = jobManagerConfiguration.getUserBasedMonitorCommand(userName);
http://git-wip-us.apache.org/repos/asf/airavata/blob/144bb8f6/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/EmailBasedMonitor.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/EmailBasedMonitor.java b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/EmailBasedMonitor.java
index eea6ef6..992317d 100644
--- a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/EmailBasedMonitor.java
+++ b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/EmailBasedMonitor.java
@@ -24,6 +24,8 @@ import org.apache.airavata.common.exception.AiravataException;
import org.apache.airavata.common.logger.AiravataLogger;
import org.apache.airavata.common.logger.AiravataLoggerFactory;
import org.apache.airavata.common.utils.ServerSettings;
+import org.apache.airavata.gfac.core.GFacException;
+import org.apache.airavata.gfac.core.GFacUtils;
import org.apache.airavata.gfac.core.context.JobExecutionContext;
import org.apache.airavata.gfac.core.GFacThreadPoolExecutor;
import org.apache.airavata.gfac.core.monitor.JobStatusResult;
@@ -36,6 +38,8 @@ import org.apache.airavata.gfac.monitor.email.parser.UGEEmailParser;
import org.apache.airavata.model.appcatalog.computeresource.ResourceJobManagerType;
import org.apache.airavata.model.messaging.event.JobIdentifier;
import org.apache.airavata.model.messaging.event.JobStatusChangeRequestEvent;
+import org.apache.airavata.model.workspace.experiment.CorrectiveAction;
+import org.apache.airavata.model.workspace.experiment.ErrorCategory;
import org.apache.airavata.model.workspace.experiment.JobState;
import org.apache.airavata.model.workspace.experiment.JobStatus;
@@ -284,6 +288,11 @@ public class EmailBasedMonitor implements Runnable{
jobMonitorMap.remove(jobStatusResult.getJobId());
runOutHandlers = true;
log.info("[EJM]: Job failed email received , removed job from job monitoring. " + jobDetails);
+ try {
+ GFacUtils.saveErrorDetails(jEC, "Job runs on remote compute resource failed", CorrectiveAction.RETRY_SUBMISSION, ErrorCategory.APPLICATION_FAILURE);
+ } catch (GFacException e) {
+ log.info("[EJM]: Error while saving error details for jobId:{}, expId: {}", jEC.getJobDetails().getJobID(), jEC.getExperimentID());
+ }
}else if (resultState == JobState.CANCELED) {
jobMonitorMap.remove(jobStatusResult.getJobId());
runOutHandlers = false; // Do we need to run out handlers in canceled case?
http://git-wip-us.apache.org/repos/asf/airavata/blob/144bb8f6/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/parser/PBSEmailParser.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/parser/PBSEmailParser.java b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/parser/PBSEmailParser.java
index 4a3c88b..8474d62 100644
--- a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/parser/PBSEmailParser.java
+++ b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/monitor/email/parser/PBSEmailParser.java
@@ -34,17 +34,17 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class PBSEmailParser implements EmailParser {
-
private static final Logger log = LoggerFactory.getLogger(PBSEmailParser.class);
-
-
- private static final String REGEX = "[a-zA-Z ]*:[ ]*(?<" + JOBID + ">[a-zA-Z0-9-\\.]*)\\s+[a-zA-Z ]*:[ ]*(?<"+
- JOBNAME + ">[a-zA-Z0-9-\\.]*)\\s+.*\\s+(?<" + STATUS + ">[a-zA-Z\\ ]*)";
- private static final String REGEX_EXIT_STATUS = "Exit_status=(?<" + EXIT_STATUS + ">[\\d]+)";
public static final String BEGUN_EXECUTION = "Begun execution";
public static final String EXECUTION_TERMINATED = "Execution terminated";
public static final String ABORTED_BY_PBS_SERVER = "Aborted by PBS Server";
+ static final String REGEX = "[a-zA-Z ]*:[ ]*(?<" + JOBID + ">[a-zA-Z0-9-\\.]*)\\s+[a-zA-Z ]*:[ ]*(?<" +
+ JOBNAME + ">[a-zA-Z0-9-\\.]*)\\s[\\S|\\s]*(?<" + STATUS + ">" + BEGUN_EXECUTION + "|" +
+ EXECUTION_TERMINATED + "|" + ABORTED_BY_PBS_SERVER + ")";
+
+ private static final String REGEX_EXIT_STATUS = "Exit_status=(?<" + EXIT_STATUS + ">[\\d]+)";
+
@Override
public JobStatusResult parseEmail(Message message) throws MessagingException, AiravataException {
JobStatusResult jobStatusResult = new JobStatusResult();
http://git-wip-us.apache.org/repos/asf/airavata/blob/144bb8f6/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/ssh/provider/impl/SSHProvider.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/ssh/provider/impl/SSHProvider.java b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/ssh/provider/impl/SSHProvider.java
index d1b35cd..d00c698 100644
--- a/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/ssh/provider/impl/SSHProvider.java
+++ b/modules/gfac/gfac-impl/src/main/java/org/apache/airavata/gfac/ssh/provider/impl/SSHProvider.java
@@ -22,6 +22,7 @@
package org.apache.airavata.gfac.ssh.provider.impl;
import org.apache.airavata.gfac.core.cluster.RemoteCluster;
+import org.apache.airavata.model.workspace.experiment.TaskState;
import org.apache.airavata.registry.cpi.AppCatalogException;
import org.apache.airavata.common.exception.AiravataException;
import org.apache.airavata.common.exception.ApplicationSettingsException;
@@ -172,20 +173,28 @@ public class SSHProvider extends AbstractProvider {
}
} else {
jobExecutionContext.setJobDetails(jobDetails);
- String verifyJobId = verifyJobSubmission(remoteCluster, jobDetails);
- if (verifyJobId != null && !verifyJobId.isEmpty()) {
- // JobStatus either changed from SUBMITTED to QUEUED or directly to QUEUED
- jobID = verifyJobId;
- jobDetails.setJobID(jobID);
- monitorPublisher.publish(new GfacExperimentStateChangeRequest(new MonitorID(jobExecutionContext)
- , GfacExperimentState.JOBSUBMITTED));
- GFacUtils.saveJobStatus(jobExecutionContext, jobDetails, JobState.QUEUED);
+ int verificationTryCount = 0;
+ while (verificationTryCount++ < 3) {
+ String verifyJobId = verifyJobSubmission(remoteCluster, jobDetails);
+ if (verifyJobId != null && !verifyJobId.isEmpty()) {
+ // JobStatus either changed from SUBMITTED to QUEUED or directly to QUEUED
+ jobID = verifyJobId;
+ jobDetails.setJobID(jobID);
+ monitorPublisher.publish(new GfacExperimentStateChangeRequest(new MonitorID(jobExecutionContext)
+ , GfacExperimentState.JOBSUBMITTED));
+ GFacUtils.saveJobStatus(jobExecutionContext, jobDetails, JobState.QUEUED);
+ break;
+ }
+ Thread.sleep(verificationTryCount * 1000);
}
}
if (jobID == null || jobID.isEmpty()) {
- log.error("Couldn't find remote jobId for JobName:" + jobDetails.getJobName() + ", ExperimentId:" + jobExecutionContext.getExperimentID());
- GFacUtils.updateExperimentStatus(jobExecutionContext.getExperimentID(), ExperimentState.FAILED);
+ String msg = "expId:" + jobExecutionContext.getExperimentID() + " Couldn't find remote jobId for JobName:"
+ + jobDetails.getJobName() + ", both submit and verify steps doesn't return a valid JobId. Hence changing experiment state to Failed";
+ log.error(msg);
+ GFacUtils.saveErrorDetails(jobExecutionContext, msg, CorrectiveAction.CONTACT_SUPPORT, ErrorCategory.AIRAVATA_INTERNAL_ERROR);
+ GFacUtils.publishTaskStatus(jobExecutionContext, monitorPublisher, TaskState.FAILED);
return;
}
data.append("jobDesc=").append(jobDescriptor.toXML());