You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cloudstack.apache.org by bh...@apache.org on 2014/11/25 15:51:47 UTC

[11/18] git commit: updated refs/heads/4.3 to b8a28df

CLOUDSTACK-7589: VM not Starting and always stuck in Stopped state after
management server restarts.

(cherry picked from commit 7cdb67dcf1ec4158ec0ab4c2fa868cc63121bbb5)
Signed-off-by: Rohit Yadav <ro...@shapeblue.com>


Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo
Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/0610bf67
Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/0610bf67
Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/0610bf67

Branch: refs/heads/4.3
Commit: 0610bf670cc5c54e73c4051c3ff5d04a916ad122
Parents: 33761a3
Author: Min Chen <mi...@citrix.com>
Authored: Fri Sep 19 15:12:09 2014 -0700
Committer: Rohit Yadav <ro...@shapeblue.com>
Committed: Tue Nov 25 19:22:33 2014 +0530

----------------------------------------------------------------------
 .../jobs/impl/AsyncJobManagerImpl.java          | 57 +++++++-------------
 .../framework/jobs/impl/SyncQueueManager.java   |  2 +
 .../jobs/impl/SyncQueueManagerImpl.java         | 13 ++++-
 3 files changed, 32 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cloudstack/blob/0610bf67/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java
----------------------------------------------------------------------
diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java
index cf0e1f1..ae59848 100644
--- a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java
+++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java
@@ -784,24 +784,6 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
         return ManagementServerNode.getManagementServerId();
     }
 
-    private void cleanupPendingJobs(List<SyncQueueItemVO> l) {
-        for (SyncQueueItemVO item : l) {
-            if (s_logger.isInfoEnabled()) {
-                s_logger.info("Discard left-over queue item: " + item.toString());
-            }
-
-            String contentType = item.getContentType();
-            if (contentType != null && contentType.equalsIgnoreCase(SyncQueueItem.AsyncJobContentType)) {
-                Long jobId = item.getContentId();
-                if (jobId != null) {
-                    s_logger.warn("Mark job as failed as its correspoding queue-item has been discarded. job id: " + jobId);
-                    completeAsyncJob(jobId, JobInfo.Status.FAILED, 0, "Execution was cancelled because of server shutdown");
-                }
-            }
-            _queueMgr.purgeItem(item.getId());
-        }
-    }
-
     @DB
     protected List<Long> wakeupByJoinedJobCompletion(long joinedJobId) {
         SearchCriteria<Long> joinJobSC = JoinJobSearch.create("joinJobId", joinedJobId);
@@ -912,6 +894,22 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
         return true;
     }
 
+    private void cleanupLeftOverJobs(final long msid) {
+        try {
+            Transaction.execute(new TransactionCallbackNoReturn() {
+                @Override
+                public void doInTransactionWithoutResult(TransactionStatus status) {
+                    // purge sync queue item running on this ms node
+                    _queueMgr.cleanupActiveQueueItems(msid, true);
+                    // reset job status for all jobs running on this ms node
+                    _jobDao.resetJobProcess(msid, ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart or shutdown");
+                }
+            });
+        } catch (Throwable e) {
+            s_logger.warn("Unexpected exception in cleaning up left over jobs for mamagement server node " + msid, e);
+        }
+    }
+
     @Override
     public void onManagementNodeJoined(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
     }
@@ -919,18 +917,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
     @Override
     public void onManagementNodeLeft(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
         for (final ManagementServerHost msHost : nodeList) {
-            try {
-                Transaction.execute(new TransactionCallbackNoReturn() {
-                    @Override
-                    public void doInTransactionWithoutResult(TransactionStatus status) {
-                        List<SyncQueueItemVO> items = _queueMgr.getActiveQueueItems(msHost.getId(), true);
-                        cleanupPendingJobs(items);
-                        _jobDao.resetJobProcess(msHost.getId(), ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart");
-                    }
-                });
-            } catch (Throwable e) {
-                s_logger.warn("Unexpected exception ", e);
-            }
+            cleanupLeftOverJobs(msHost.getId());
         }
     }
 
@@ -940,15 +927,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
 
     @Override
     public boolean start() {
-        try {
-            _jobDao.cleanupPseduoJobs(getMsid());
-
-            List<SyncQueueItemVO> l = _queueMgr.getActiveQueueItems(getMsid(), false);
-            cleanupPendingJobs(l);
-            _jobDao.resetJobProcess(getMsid(), ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart");
-        } catch (Throwable e) {
-            s_logger.error("Unexpected exception " + e.getMessage(), e);
-        }
+        cleanupLeftOverJobs(getMsid());
 
         _heartbeatScheduler.scheduleAtFixedRate(getHeartbeatTask(), HEARTBEAT_INTERVAL, HEARTBEAT_INTERVAL, TimeUnit.MILLISECONDS);
         _heartbeatScheduler.scheduleAtFixedRate(getGCTask(), GC_INTERVAL, GC_INTERVAL, TimeUnit.MILLISECONDS);

http://git-wip-us.apache.org/repos/asf/cloudstack/blob/0610bf67/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManager.java
----------------------------------------------------------------------
diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManager.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManager.java
index 202a704..2a39829 100644
--- a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManager.java
+++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManager.java
@@ -31,4 +31,6 @@ public interface SyncQueueManager extends Manager {
     public List<SyncQueueItemVO> getBlockedQueueItems(long thresholdMs, boolean exclusive);
 
     void purgeAsyncJobQueueItemId(long asyncJobId);
+
+    public void cleanupActiveQueueItems(Long msid, boolean exclusive);
 }

http://git-wip-us.apache.org/repos/asf/cloudstack/blob/0610bf67/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManagerImpl.java
----------------------------------------------------------------------
diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManagerImpl.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManagerImpl.java
index 9d3bf80..82ab959 100644
--- a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManagerImpl.java
+++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManagerImpl.java
@@ -26,7 +26,6 @@ import org.apache.log4j.Logger;
 
 import org.apache.cloudstack.framework.jobs.dao.SyncQueueDao;
 import org.apache.cloudstack.framework.jobs.dao.SyncQueueItemDao;
-
 import com.cloud.utils.DateUtil;
 import com.cloud.utils.component.ManagerBase;
 import com.cloud.utils.db.DB;
@@ -258,4 +257,16 @@ public class SyncQueueManagerImpl extends ManagerBase implements SyncQueueManage
             purgeItem(itemId);
         }
     }
+
+    @Override
+    public void cleanupActiveQueueItems(Long msid, boolean exclusive) {
+        List<SyncQueueItemVO> l = getActiveQueueItems(msid, false);
+        for (SyncQueueItemVO item : l) {
+            if (s_logger.isInfoEnabled()) {
+                s_logger.info("Discard left-over queue item: " + item.toString());
+            }
+            purgeItem(item.getId());
+        }
+    }
+
 }