You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airavata.apache.org by la...@apache.org on 2014/09/18 22:40:12 UTC
git commit: increasing the failed count and completely rely on amqp
monitoring
Repository: airavata
Updated Branches:
refs/heads/master e88723225 -> 69ada188d
increasing the failed count and completely rely on amqp monitoring
Project: http://git-wip-us.apache.org/repos/asf/airavata/repo
Commit: http://git-wip-us.apache.org/repos/asf/airavata/commit/69ada188
Tree: http://git-wip-us.apache.org/repos/asf/airavata/tree/69ada188
Diff: http://git-wip-us.apache.org/repos/asf/airavata/diff/69ada188
Branch: refs/heads/master
Commit: 69ada188dac7fb25bc56b72fa50c487bb91ca7c3
Parents: e887232
Author: lahiru <la...@apache.org>
Authored: Thu Sep 18 16:40:07 2014 -0400
Committer: lahiru <la...@apache.org>
Committed: Thu Sep 18 16:40:07 2014 -0400
----------------------------------------------------------------------
.../client/samples/CreateLaunchExperiment.java | 36 ++++++++++++++------
.../monitor/impl/pull/qstat/HPCPullMonitor.java | 9 +++--
.../airavata/gfac/monitor/util/CommonUtils.java | 2 +-
.../gsi/ssh/api/job/PBSOutputParser.java | 5 +--
.../gsi/ssh/api/job/SlurmOutputParser.java | 5 +--
5 files changed, 38 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/airavata/blob/69ada188/airavata-api/airavata-client-sdks/java-client-samples/src/main/java/org/apache/airavata/client/samples/CreateLaunchExperiment.java
----------------------------------------------------------------------
diff --git a/airavata-api/airavata-client-sdks/java-client-samples/src/main/java/org/apache/airavata/client/samples/CreateLaunchExperiment.java b/airavata-api/airavata-client-sdks/java-client-samples/src/main/java/org/apache/airavata/client/samples/CreateLaunchExperiment.java
index 490fbf9..e790fc5 100644
--- a/airavata-api/airavata-client-sdks/java-client-samples/src/main/java/org/apache/airavata/client/samples/CreateLaunchExperiment.java
+++ b/airavata-api/airavata-client-sdks/java-client-samples/src/main/java/org/apache/airavata/client/samples/CreateLaunchExperiment.java
@@ -40,17 +40,17 @@ import java.util.Map;
public class CreateLaunchExperiment {
//FIXME: Read from a config file
- public static final String THRIFT_SERVER_HOST = "localhost";
- public static final int THRIFT_SERVER_PORT = 8930;
+ public static final String THRIFT_SERVER_HOST = "149.165.228.109";
+ public static final int THRIFT_SERVER_PORT = 9930;
private final static Logger logger = LoggerFactory.getLogger(CreateLaunchExperiment.class);
private static final String DEFAULT_USER = "default.registry.user";
private static final String DEFAULT_GATEWAY = "default.registry.gateway";
private static Airavata.Client airavataClient;
- private static String echoAppId = "Echo_b7cebf37-df12-4803-a50c-efdbc2edd9b6";
+ private static String echoAppId = "Echo_c6e6aaac-7d9d-44fc-aba2-63b5100528e8";
private static String wrfAppId = "WRF_5f097c9c-7066-49ec-aed7-4e39607b3adc";
private static String amberAppId = "Amber_89906be6-5678-49a6-9d04-a0604fbdef2e";
- private static String localHost = "localhost";
+ private static String localHost = "149.165.228.109";
private static String trestlesHostName = "trestles.sdsc.xsede.org";
private static String stampedeHostName = "stampede.tacc.xsede.org";
private static String br2HostName = "bigred2.uits.iu.edu";
@@ -59,16 +59,29 @@ public class CreateLaunchExperiment {
try {
airavataClient = AiravataClientFactory.createAiravataClient(THRIFT_SERVER_HOST, THRIFT_SERVER_PORT);
System.out.println("API version is " + airavataClient.getAPIVersion());
- registerApplications();
+// registerApplications();
//// final String expId = createExperimentForSSHHost(airavata);
-// final String expId = createEchoExperimentForTrestles(airavataClient);
// final String expId = createEchoExperimentForStampede(airavataClient);
// final String expId = createExperimentEchoForLocalHost(airavataClient);
// final String expId = createExperimentWRFTrestles(airavataClient);
-// final String expId = createExperimentForBR2(airavataClient);
+ for(int i=0;i<100;i++) {
+// (new Thread(){
+// @Override
+// public void run() {
+// try {
+ final String expId = createExperimentForBR2(airavataClient);
+ launchExperiment(airavataClient, expId);
+ System.out.println(expId);
+// } catch (Exception e) {
+// logger.error("Error while connecting with server", e.getMessage());
+// e.printStackTrace();
+// }
+// }
+// }).start();
+// final String expId = createEchoExperimentForTrestles(airavataClient);
// final String expId = createExperimentForBR2Amber(airavataClient);
// final String expId = createExperimentWRFStampede(airavataClient);
// final String expId = createExperimentForStampedeAmber(airavataClient);
@@ -76,7 +89,8 @@ public class CreateLaunchExperiment {
// System.out.println("Experiment ID : " + expId);
// updateExperiment(airavata, expId);
-// launchExperiment(airavataClient, expId);
+
+ }
// System.out.println("retrieved exp id : " + experiment.getExperimentID());
} catch (Exception e) {
@@ -568,11 +582,11 @@ public class CreateLaunchExperiment {
output.setValue("");
exOut.add(output);
- Project project = ProjectModelUtil.createProject("default", "admin", "test project");
+ Project project = ProjectModelUtil.createProject("default", "lahiru", "test project");
String projectId = client.createProject(project);
Experiment simpleExperiment =
- ExperimentModelUtil.createSimpleExperiment(projectId, "admin", "sshEchoExperiment", "SimpleEchoBR", echoAppId, exInputs);
+ ExperimentModelUtil.createSimpleExperiment(projectId, "lahiru", "sshEchoExperiment", "SimpleEchoBR", echoAppId, exInputs);
simpleExperiment.setExperimentOutputs(exOut);
Map<String, String> computeResources = airavataClient.getAvailableAppInterfaceComputeResources(echoAppId);
@@ -856,7 +870,7 @@ public class CreateLaunchExperiment {
public static void launchExperiment(Airavata.Client client, String expId)
throws TException {
try {
- String sshTokenId = "61abd2ff-f92b-4901-a077-07b51abe2c5d";
+ String sshTokenId = "2c308fa9-99f8-4baa-92e4-d062e311483c";
String gsisshTokenId = "61abd2ff-f92b-4901-a077-07b51abe2c5d";
client.launchExperiment(expId, sshTokenId);
} catch (ExperimentNotFoundException e) {
http://git-wip-us.apache.org/repos/asf/airavata/blob/69ada188/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java b/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java
index 5ea9cee..5260786 100644
--- a/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java
+++ b/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java
@@ -63,7 +63,7 @@ import java.util.concurrent.LinkedBlockingQueue;
*/
public class HPCPullMonitor extends PullMonitor {
private final static Logger logger = LoggerFactory.getLogger(HPCPullMonitor.class);
- public static final int FAILED_COUNT = 5;
+ public static final int FAILED_COUNT = 100000;
// I think this should use DelayedBlocking Queue to do the monitoring*/
private BlockingQueue<UserMonitorData> queue;
@@ -207,9 +207,11 @@ public class HPCPullMonitor extends PullMonitor {
if (cancelMId.equals(iMonitorID.getUserName() + "," + iMonitorID.getJobName())) {
logger.info("This job is finished because push notification came with <username,jobName> " + cancelMId);
completedJobs.add(iMonitorID);
- iterator.remove();
iMonitorID.setStatus(JobState.COMPLETE);
}
+ //we have to make this empty everytime we iterate, otherwise this list will accumilate and will
+ // lead to a memory leak
+ iterator.remove();
}
}
Map<String, JobState> jobStatuses = connection.getJobStatuses(monitorID);
@@ -241,7 +243,8 @@ public class HPCPullMonitor extends PullMonitor {
logger.info(e.getLocalizedMessage(), e);
}
} else if (iMonitorID.getFailedCount() > FAILED_COUNT) {
- logger.error("Tried to monitor the job with ID " + iMonitorID.getJobID() + " But failed 3 times, so skip this Job from Monitor");
+ logger.error("Tried to monitor the job with ID " + iMonitorID.getJobID() + " But failed" +iMonitorID.getFailedCount()+
+ " 3 times, so skip this Job from Monitor");
iMonitorID.setLastMonitored(new Timestamp((new Date()).getTime()));
completedJobs.add(iMonitorID);
try {
http://git-wip-us.apache.org/repos/asf/airavata/blob/69ada188/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/util/CommonUtils.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/util/CommonUtils.java b/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/util/CommonUtils.java
index fb4d898..c40d50d 100644
--- a/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/util/CommonUtils.java
+++ b/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/util/CommonUtils.java
@@ -172,7 +172,7 @@ public class CommonUtils {
}
}
}
- logger.error("Cannot find the given MonitorID in the queue with userName " +
+ logger.info("Cannot find the given MonitorID in the queue with userName " +
monitorID.getUserName() + " and jobID " + monitorID.getJobID());
logger.info("This might not be an error because someone else removed this job from the queue");
}
http://git-wip-us.apache.org/repos/asf/airavata/blob/69ada188/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/PBSOutputParser.java
----------------------------------------------------------------------
diff --git a/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/PBSOutputParser.java b/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/PBSOutputParser.java
index e8e8cbe..6ea5846 100644
--- a/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/PBSOutputParser.java
+++ b/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/PBSOutputParser.java
@@ -159,7 +159,7 @@ public class PBSOutputParser implements OutputParser {
public void parse(String userName, Map<String, JobStatus> statusMap, String rawOutput) {
log.debug(rawOutput);
String[] info = rawOutput.split("\n");
- int lastStop = 0;
+// int lastStop = 0;
for (String jobID : statusMap.keySet()) {
String jobName = jobID.split(",")[1];
for (int i = 0; i < info.length; i++) {
@@ -174,10 +174,11 @@ public class PBSOutputParser implements OutputParser {
columnList.add(s);
}
}
- lastStop = i + 1;
+// lastStop = i + 1;
statusMap.put(jobID, JobStatus.valueOf(columnList.get(9)));
break;
}
+ log.error("Couldn't find the status of the Job with JobName: " + jobName + "Job Id: " + jobID.split(",")[0]);
}
}
}
http://git-wip-us.apache.org/repos/asf/airavata/blob/69ada188/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/SlurmOutputParser.java
----------------------------------------------------------------------
diff --git a/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/SlurmOutputParser.java b/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/SlurmOutputParser.java
index 8660f35..64d8417 100644
--- a/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/SlurmOutputParser.java
+++ b/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/SlurmOutputParser.java
@@ -156,7 +156,7 @@ public class SlurmOutputParser implements OutputParser {
log.info("There are no jobs with this username ... ");
return;
}
- int lastStop = 0;
+// int lastStop = 0;
for (String jobID : statusMap.keySet()) {
String jobId = jobID.split(",")[0];
String jobName = jobID.split(",")[1];
@@ -172,10 +172,11 @@ public class SlurmOutputParser implements OutputParser {
columnList.add(s);
}
}
- lastStop = i + 1;
+// lastStop = i + 1;
statusMap.put(jobID, JobStatus.valueOf(columnList.get(4)));
break;
}
+ log.error("Couldn't find the status of the Job with JobName: " + jobName + "Job Id: " + jobId);
}
}
}