You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airavata.apache.org by sa...@apache.org on 2014/07/22 17:05:39 UTC

[14/50] [abbrv] git commit: fixing monitorng

fixing monitorng


Project: http://git-wip-us.apache.org/repos/asf/airavata/repo
Commit: http://git-wip-us.apache.org/repos/asf/airavata/commit/b0fde67c
Tree: http://git-wip-us.apache.org/repos/asf/airavata/tree/b0fde67c
Diff: http://git-wip-us.apache.org/repos/asf/airavata/diff/b0fde67c

Branch: refs/heads/workflow-support
Commit: b0fde67c03f2787c5f6d7ec576dc92ab41322c53
Parents: 779b618
Author: lahiru <la...@apache.org>
Authored: Sat Jul 12 12:58:38 2014 -0400
Committer: lahiru <la...@apache.org>
Committed: Sat Jul 12 12:58:38 2014 -0400

----------------------------------------------------------------------
 .../airavata/gfac/core/monitor/MonitorID.java       | 10 ++++++++--
 .../monitor/impl/pull/qstat/HPCPullMonitor.java     | 16 ++++++++--------
 .../airavata/gfac/monitor/util/CommonUtils.java     |  1 +
 3 files changed, 17 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/airavata/blob/b0fde67c/modules/gfac/gfac-core/src/main/java/org/apache/airavata/gfac/core/monitor/MonitorID.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-core/src/main/java/org/apache/airavata/gfac/core/monitor/MonitorID.java b/modules/gfac/gfac-core/src/main/java/org/apache/airavata/gfac/core/monitor/MonitorID.java
index 8456e35..8599a02 100644
--- a/modules/gfac/gfac-core/src/main/java/org/apache/airavata/gfac/core/monitor/MonitorID.java
+++ b/modules/gfac/gfac-core/src/main/java/org/apache/airavata/gfac/core/monitor/MonitorID.java
@@ -177,12 +177,14 @@ public class MonitorID {
         // because in some machines job state vanishes quicckly when the job is done
         // during that case job state comes as unknown.so we handle it here.
         if (this.state != null && status.equals(JobState.UNKNOWN)) {
-            if (getFailedCount() > 2) {
+            if (getFailedCount() >= 2) {
                 switch (this.state) {
                     case ACTIVE:
                         this.state = JobState.COMPLETE;
+                        logger.info("Failed count is high and old status is ACTIVE so we mark this as COMPLETE");
                         break;
                     case QUEUED:
+                        logger.info("Failed count is high and old status is QUEUED so we mark this as COMPLETE");
                         this.state = JobState.COMPLETE;
                         break;
                 }
@@ -193,10 +195,14 @@ public class MonitorID {
                 } catch (InterruptedException e) {
                     e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
                 }
+                int loginfo = getFailedCount()+1;
+                logger.info("Increasing the failed count to:"+loginfo);
                 setFailedCount(getFailedCount() + 1);
             }
-        } else {
+        }    else {
             // normal scenario
+            logger.info("Resetting failed count to 0 because correct state came in");
+            setFailedCount(0);
             this.state = status;
         }
     }

http://git-wip-us.apache.org/repos/asf/airavata/blob/b0fde67c/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java b/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java
index 193f23f..1238bf6 100644
--- a/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java
+++ b/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java
@@ -149,20 +149,20 @@ public class HPCPullMonitor extends PullMonitor {
                 if (iHostMonitorData.getHost().getType() instanceof GsisshHostType
                         || iHostMonitorData.getHost().getType() instanceof SSHHostType) {
                     currentHostDescription = iHostMonitorData.getHost();
-                    String hostName = iHostMonitorData.getHost().getType().getHostAddress();
+                    String hostName =  iHostMonitorData.getHost().getType().getHostAddress();
                     ResourceConnection connection = null;
                     if (connections.containsKey(hostName)) {
                         logger.debug("We already have this connection so not going to create one");
                         connection = connections.get(hostName);
                     } else {
-                        connection = new ResourceConnection(iHostMonitorData, getAuthenticationInfo());
+                        connection = new ResourceConnection(iHostMonitorData,getAuthenticationInfo());
                         connections.put(hostName, connection);
                     }
                     List<MonitorID> monitorID = iHostMonitorData.getMonitorIDs();
                     Map<String, JobState> jobStatuses = connection.getJobStatuses(monitorID);
                     for (MonitorID iMonitorID : monitorID) {
                         currentMonitorID = iMonitorID;
-                        iMonitorID.setStatus(jobStatuses.get(iMonitorID.getJobID()));
+                        iMonitorID.setStatus(jobStatuses.get(iMonitorID.getJobID()));    //IMPORTANT this is not a simple setter we have a logic
                         jobStatus = new JobStatusChangeRequest(iMonitorID);
                         // we have this JobStatus class to handle amqp monitoring
 
@@ -176,13 +176,13 @@ public class HPCPullMonitor extends PullMonitor {
                             try {
                                 gfac.invokeOutFlowHandlers(iMonitorID.getJobExecutionContext());
                             } catch (GFacException e) {
-                                publisher.publish(new TaskStatusChangeRequest(new TaskIdentity(iMonitorID.getExperimentID(), iMonitorID.getWorkflowNodeID(),
-                                        iMonitorID.getTaskID()), TaskState.FAILED));
-                                publisher.publish(new ExperimentStatusChangeRequest(new ExperimentIdentity(iMonitorID.getExperimentID()),
-                                        ExperimentState.FAILED));
+                            	publisher.publish(new TaskStatusChangeRequest(new TaskIdentity(iMonitorID.getExperimentID(), iMonitorID.getWorkflowNodeID(),
+										iMonitorID.getTaskID()), TaskState.FAILED));
+                            	publisher.publish(new ExperimentStatusChangeRequest(new ExperimentIdentity(iMonitorID.getExperimentID()),
+										ExperimentState.FAILED));
                                 logger.info(e.getLocalizedMessage(), e);
                             }
-                        } else if (iMonitorID.getFailedCount() > 2 && iMonitorID.getStatus().equals(JobState.UNKNOWN)) {
+                        } else if (iMonitorID.getFailedCount() > 2) {
                             logger.error("Tried to monitor the job with ID " + iMonitorID.getJobID() + " But failed 3 times, so skip this Job from Monitor");
                             iMonitorID.setLastMonitored(new Timestamp((new Date()).getTime()));
                             completedJobs.add(iMonitorID);

http://git-wip-us.apache.org/repos/asf/airavata/blob/b0fde67c/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/util/CommonUtils.java
----------------------------------------------------------------------
diff --git a/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/util/CommonUtils.java b/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/util/CommonUtils.java
index a9f1520..27b213f 100644
--- a/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/util/CommonUtils.java
+++ b/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/util/CommonUtils.java
@@ -138,6 +138,7 @@ public class CommonUtils {
                             if(iMonitorID.getJobID().equals(monitorID.getJobID())) {
                                 // OK we found the object, we cannot do list.remove(object) states of two objects
                                 // could be different, thats why we check the jobID
+                                logger.info("Removing the job:"+ monitorID.getJobID()+" from monitoring last status:" + monitorID.getStatus().toString());
                                 monitorIDs.remove(iMonitorID);
                                 if(monitorIDs.size()==0) {
                                     hostMonitorData.remove(iHostMonitorID);