You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airavata.apache.org by la...@apache.org on 2014/09/18 22:40:12 UTC

git commit: increasing the failed count and completely rely on amqp monitoring

Repository: airavata
Updated Branches:
  refs/heads/master e88723225 -> 69ada188d


increasing the failed count and completely rely on amqp monitoring


Project: http://git-wip-us.apache.org/repos/asf/airavata/repo
Commit: http://git-wip-us.apache.org/repos/asf/airavata/commit/69ada188
Tree: http://git-wip-us.apache.org/repos/asf/airavata/tree/69ada188
Diff: http://git-wip-us.apache.org/repos/asf/airavata/diff/69ada188

Branch: refs/heads/master
Commit: 69ada188dac7fb25bc56b72fa50c487bb91ca7c3
Parents: e887232
Author: lahiru <la...@apache.org>
Authored: Thu Sep 18 16:40:07 2014 -0400
Committer: lahiru <la...@apache.org>
Committed: Thu Sep 18 16:40:07 2014 -0400

----------------------------------------------------------------------
 .../client/samples/CreateLaunchExperiment.java  | 36 ++++++++++++++------
 .../monitor/impl/pull/qstat/HPCPullMonitor.java |  9 +++--
 .../airavata/gfac/monitor/util/CommonUtils.java |  2 +-
 .../gsi/ssh/api/job/PBSOutputParser.java        |  5 +--
 .../gsi/ssh/api/job/SlurmOutputParser.java      |  5 +--
 5 files changed, 38 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/airavata/blob/69ada188/airavata-api/airavata-client-sdks/java-client-samples/src/main/java/org/apache/airavata/client/samples/CreateLaunchExperiment.java
----------------------------------------------------------------------
diff --git a/airavata-api/airavata-client-sdks/java-client-samples/src/main/java/org/apache/airavata/client/samples/CreateLaunchExperiment.java b/airavata-api/airavata-client-sdks/java-client-samples/src/main/java/org/apache/airavata/client/samples/CreateLaunchExperiment.java
index 490fbf9..e790fc5 100644
--- a/airavata-api/airavata-client-sdks/java-client-samples/src/main/java/org/apache/airavata/client/samples/CreateLaunchExperiment.java
+++ b/airavata-api/airavata-client-sdks/java-client-samples/src/main/java/org/apache/airavata/client/samples/CreateLaunchExperiment.java
@@ -40,17 +40,17 @@ import java.util.Map;
 public class CreateLaunchExperiment {
 
     //FIXME: Read from a config file
-    public static final String THRIFT_SERVER_HOST = "localhost";
-    public static final int THRIFT_SERVER_PORT = 8930;
+    public static final String THRIFT_SERVER_HOST = "149.165.228.109";
+    public static final int THRIFT_SERVER_PORT = 9930;
     private final static Logger logger = LoggerFactory.getLogger(CreateLaunchExperiment.class);
     private static final String DEFAULT_USER = "default.registry.user";
     private static final String DEFAULT_GATEWAY = "default.registry.gateway";
     private static Airavata.Client airavataClient;
-    private static String echoAppId = "Echo_b7cebf37-df12-4803-a50c-efdbc2edd9b6";
+    private static String echoAppId = "Echo_c6e6aaac-7d9d-44fc-aba2-63b5100528e8";
     private static String wrfAppId = "WRF_5f097c9c-7066-49ec-aed7-4e39607b3adc";
     private static String amberAppId = "Amber_89906be6-5678-49a6-9d04-a0604fbdef2e";
 
-    private static String localHost = "localhost";
+    private static String localHost = "149.165.228.109";
     private static String trestlesHostName = "trestles.sdsc.xsede.org";
     private static String stampedeHostName = "stampede.tacc.xsede.org";
     private static String br2HostName = "bigred2.uits.iu.edu";
@@ -59,16 +59,29 @@ public class CreateLaunchExperiment {
         try {
             airavataClient = AiravataClientFactory.createAiravataClient(THRIFT_SERVER_HOST, THRIFT_SERVER_PORT);
             System.out.println("API version is " + airavataClient.getAPIVersion());
-            registerApplications();
+//            registerApplications();
 
 
 
 ////            final String expId = createExperimentForSSHHost(airavata);
-//            final String expId = createEchoExperimentForTrestles(airavataClient);
 //            final String expId = createEchoExperimentForStampede(airavataClient);
 //            final String expId = createExperimentEchoForLocalHost(airavataClient);
 //            final String expId = createExperimentWRFTrestles(airavataClient);
-//            final String expId = createExperimentForBR2(airavataClient);
+            for(int i=0;i<100;i++) {
+//                (new Thread(){
+//                    @Override
+//                    public void run() {
+//                        try {
+                            final String expId = createExperimentForBR2(airavataClient);
+                            launchExperiment(airavataClient, expId);
+                            System.out.println(expId);
+//                        } catch (Exception e) {
+//                            logger.error("Error while connecting with server", e.getMessage());
+//                            e.printStackTrace();
+//                        }
+//                    }
+//                }).start();
+//            final String expId = createEchoExperimentForTrestles(airavataClient);
 //            final String expId = createExperimentForBR2Amber(airavataClient);
 //            final String expId = createExperimentWRFStampede(airavataClient);
 //            final String expId = createExperimentForStampedeAmber(airavataClient);
@@ -76,7 +89,8 @@ public class CreateLaunchExperiment {
 
 //            System.out.println("Experiment ID : " + expId);
 //            updateExperiment(airavata, expId);
-//            launchExperiment(airavataClient, expId);
+
+            }
 
 //            System.out.println("retrieved exp id : " + experiment.getExperimentID());
         } catch (Exception e) {
@@ -568,11 +582,11 @@ public class CreateLaunchExperiment {
             output.setValue("");
             exOut.add(output);
 
-            Project project = ProjectModelUtil.createProject("default", "admin", "test project");
+            Project project = ProjectModelUtil.createProject("default", "lahiru", "test project");
             String projectId = client.createProject(project);
 
             Experiment simpleExperiment =
-                    ExperimentModelUtil.createSimpleExperiment(projectId, "admin", "sshEchoExperiment", "SimpleEchoBR", echoAppId, exInputs);
+                    ExperimentModelUtil.createSimpleExperiment(projectId, "lahiru", "sshEchoExperiment", "SimpleEchoBR", echoAppId, exInputs);
             simpleExperiment.setExperimentOutputs(exOut);
 
             Map<String, String> computeResources = airavataClient.getAvailableAppInterfaceComputeResources(echoAppId);
@@ -856,7 +870,7 @@ public class CreateLaunchExperiment {
     public static void launchExperiment(Airavata.Client client, String expId)
             throws TException {
         try {
-            String sshTokenId = "61abd2ff-f92b-4901-a077-07b51abe2c5d";
+            String sshTokenId = "2c308fa9-99f8-4baa-92e4-d062e311483c";
             String gsisshTokenId = "61abd2ff-f92b-4901-a077-07b51abe2c5d";
             client.launchExperiment(expId, sshTokenId);
         } catch (ExperimentNotFoundException e) {

http://git-wip-us.apache.org/repos/asf/airavata/blob/69ada188/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java b/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java
index 5ea9cee..5260786 100644
--- a/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java
+++ b/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java
@@ -63,7 +63,7 @@ import java.util.concurrent.LinkedBlockingQueue;
  */
 public class HPCPullMonitor extends PullMonitor {
     private final static Logger logger = LoggerFactory.getLogger(HPCPullMonitor.class);
-    public static final int FAILED_COUNT = 5;
+    public static final int FAILED_COUNT = 100000;
 
     // I think this should use DelayedBlocking Queue to do the monitoring*/
     private BlockingQueue<UserMonitorData> queue;
@@ -207,9 +207,11 @@ public class HPCPullMonitor extends PullMonitor {
                             if (cancelMId.equals(iMonitorID.getUserName() + "," + iMonitorID.getJobName())) {
                                 logger.info("This job is finished because push notification came with <username,jobName> " + cancelMId);
                                 completedJobs.add(iMonitorID);
-                                iterator.remove();
                                 iMonitorID.setStatus(JobState.COMPLETE);
                             }
+                            //we have to make this empty everytime we iterate, otherwise this list will accumilate and will
+                            // lead to a memory leak
+                            iterator.remove();
                         }
                     }
                     Map<String, JobState> jobStatuses = connection.getJobStatuses(monitorID);
@@ -241,7 +243,8 @@ public class HPCPullMonitor extends PullMonitor {
                                     logger.info(e.getLocalizedMessage(), e);
                                 }
                             } else if (iMonitorID.getFailedCount() > FAILED_COUNT) {
-                                logger.error("Tried to monitor the job with ID " + iMonitorID.getJobID() + " But failed 3 times, so skip this Job from Monitor");
+                                logger.error("Tried to monitor the job with ID " + iMonitorID.getJobID() + " But failed" +iMonitorID.getFailedCount()+
+                                        " 3 times, so skip this Job from Monitor");
                                 iMonitorID.setLastMonitored(new Timestamp((new Date()).getTime()));
                                 completedJobs.add(iMonitorID);
                                 try {

http://git-wip-us.apache.org/repos/asf/airavata/blob/69ada188/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/util/CommonUtils.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/util/CommonUtils.java b/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/util/CommonUtils.java
index fb4d898..c40d50d 100644
--- a/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/util/CommonUtils.java
+++ b/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/util/CommonUtils.java
@@ -172,7 +172,7 @@ public class CommonUtils {
                 }
             }
         }
-        logger.error("Cannot find the given MonitorID in the queue with userName " +
+        logger.info("Cannot find the given MonitorID in the queue with userName " +
                 monitorID.getUserName() + "  and jobID " + monitorID.getJobID());
         logger.info("This might not be an error because someone else removed this job from the queue");
     }

http://git-wip-us.apache.org/repos/asf/airavata/blob/69ada188/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/PBSOutputParser.java
----------------------------------------------------------------------
diff --git a/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/PBSOutputParser.java b/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/PBSOutputParser.java
index e8e8cbe..6ea5846 100644
--- a/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/PBSOutputParser.java
+++ b/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/PBSOutputParser.java
@@ -159,7 +159,7 @@ public class PBSOutputParser implements OutputParser {
     public void parse(String userName, Map<String, JobStatus> statusMap, String rawOutput) {
         log.debug(rawOutput);
         String[]    info = rawOutput.split("\n");
-        int lastStop = 0;
+//        int lastStop = 0;
         for (String jobID : statusMap.keySet()) {
             String jobName = jobID.split(",")[1];
             for (int i = 0; i < info.length; i++) {
@@ -174,10 +174,11 @@ public class PBSOutputParser implements OutputParser {
                             columnList.add(s);
                         }
                     }
-                    lastStop = i + 1;
+//                    lastStop = i + 1;
                     statusMap.put(jobID, JobStatus.valueOf(columnList.get(9)));
                     break;
                 }
+                log.error("Couldn't find the status of the Job with JobName: " + jobName + "Job Id: " + jobID.split(",")[0]);
             }
         }
     }

http://git-wip-us.apache.org/repos/asf/airavata/blob/69ada188/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/SlurmOutputParser.java
----------------------------------------------------------------------
diff --git a/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/SlurmOutputParser.java b/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/SlurmOutputParser.java
index 8660f35..64d8417 100644
--- a/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/SlurmOutputParser.java
+++ b/tools/gsissh/src/main/java/org/apache/airavata/gsi/ssh/api/job/SlurmOutputParser.java
@@ -156,7 +156,7 @@ public class SlurmOutputParser implements OutputParser {
             log.info("There are no jobs with this username ... ");
             return;
         }
-        int lastStop = 0;
+//        int lastStop = 0;
         for (String jobID : statusMap.keySet()) {
             String jobId = jobID.split(",")[0];
             String jobName = jobID.split(",")[1];
@@ -172,10 +172,11 @@ public class SlurmOutputParser implements OutputParser {
                             columnList.add(s);
                         }
                     }
-                    lastStop = i + 1;
+//                    lastStop = i + 1;
                     statusMap.put(jobID, JobStatus.valueOf(columnList.get(4)));
                     break;
                 }
+                log.error("Couldn't find the status of the Job with JobName: " + jobName + "Job Id: " + jobId);
             }
         }
     }