You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cloudstack.apache.org by mc...@apache.org on 2014/09/20 00:22:42 UTC

git commit: updated refs/heads/master to 7cdb67d

Repository: cloudstack
Updated Branches:
  refs/heads/master dd4f6bcaf -> 7cdb67dcf


CLOUDSTACK-7589: VM not Starting and always stuck in Stopped state after
management server restarts.


Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo
Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/7cdb67dc
Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/7cdb67dc
Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/7cdb67dc

Branch: refs/heads/master
Commit: 7cdb67dcf1ec4158ec0ab4c2fa868cc63121bbb5
Parents: dd4f6bc
Author: Min Chen <mi...@citrix.com>
Authored: Fri Sep 19 15:12:09 2014 -0700
Committer: Min Chen <mi...@citrix.com>
Committed: Fri Sep 19 15:12:31 2014 -0700

----------------------------------------------------------------------
 .../jobs/impl/AsyncJobManagerImpl.java          | 57 +++++++-------------
 .../framework/jobs/impl/SyncQueueManager.java   |  2 +
 .../jobs/impl/SyncQueueManagerImpl.java         | 13 ++++-
 3 files changed, 32 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cloudstack/blob/7cdb67dc/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java
----------------------------------------------------------------------
diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java
index 7d374da..4c4d3c2 100644
--- a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java
+++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java
@@ -835,24 +835,6 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
         return ManagementServerNode.getManagementServerId();
     }
 
-    private void cleanupPendingJobs(List<SyncQueueItemVO> l) {
-        for (SyncQueueItemVO item : l) {
-            if (s_logger.isInfoEnabled()) {
-                s_logger.info("Discard left-over queue item: " + item.toString());
-            }
-
-            String contentType = item.getContentType();
-            if (contentType != null && contentType.equalsIgnoreCase(SyncQueueItem.AsyncJobContentType)) {
-                Long jobId = item.getContentId();
-                if (jobId != null) {
-                    s_logger.warn("Mark job as failed as its correspoding queue-item has been discarded. job id: " + jobId);
-                    completeAsyncJob(jobId, JobInfo.Status.FAILED, 0, "Execution was cancelled because of server shutdown");
-                }
-            }
-            _queueMgr.purgeItem(item.getId());
-        }
-    }
-
     @DB
     protected List<Long> wakeupByJoinedJobCompletion(long joinedJobId) {
         SearchCriteria<Long> joinJobSC = JoinJobSearch.create("joinJobId", joinedJobId);
@@ -967,6 +949,22 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
         return true;
     }
 
+    private void cleanupLeftOverJobs(final long msid) {
+        try {
+            Transaction.execute(new TransactionCallbackNoReturn() {
+                @Override
+                public void doInTransactionWithoutResult(TransactionStatus status) {
+                    // purge sync queue item running on this ms node
+                    _queueMgr.cleanupActiveQueueItems(msid, true);
+                    // reset job status for all jobs running on this ms node
+                    _jobDao.resetJobProcess(msid, ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart or shutdown");
+                }
+            });
+        } catch (Throwable e) {
+            s_logger.warn("Unexpected exception in cleaning up left over jobs for mamagement server node " + msid, e);
+        }
+    }
+
     @Override
     public void onManagementNodeJoined(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
     }
@@ -974,18 +972,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
     @Override
     public void onManagementNodeLeft(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
         for (final ManagementServerHost msHost : nodeList) {
-            try {
-                Transaction.execute(new TransactionCallbackNoReturn() {
-                    @Override
-                    public void doInTransactionWithoutResult(TransactionStatus status) {
-                        List<SyncQueueItemVO> items = _queueMgr.getActiveQueueItems(msHost.getId(), true);
-                        cleanupPendingJobs(items);
-                        _jobDao.resetJobProcess(msHost.getId(), ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart");
-                    }
-                });
-            } catch (Throwable e) {
-                s_logger.warn("Unexpected exception ", e);
-            }
+            cleanupLeftOverJobs(msHost.getId());
         }
     }
 
@@ -995,15 +982,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
 
     @Override
     public boolean start() {
-        try {
-            _jobDao.cleanupPseduoJobs(getMsid());
-
-            List<SyncQueueItemVO> l = _queueMgr.getActiveQueueItems(getMsid(), false);
-            cleanupPendingJobs(l);
-            _jobDao.resetJobProcess(getMsid(), ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart");
-        } catch (Throwable e) {
-            s_logger.error("Unexpected exception " + e.getMessage(), e);
-        }
+        cleanupLeftOverJobs(getMsid());
 
         _heartbeatScheduler.scheduleAtFixedRate(getHeartbeatTask(), HEARTBEAT_INTERVAL, HEARTBEAT_INTERVAL, TimeUnit.MILLISECONDS);
         _heartbeatScheduler.scheduleAtFixedRate(getGCTask(), GC_INTERVAL, GC_INTERVAL, TimeUnit.MILLISECONDS);

http://git-wip-us.apache.org/repos/asf/cloudstack/blob/7cdb67dc/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManager.java
----------------------------------------------------------------------
diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManager.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManager.java
index b521ffe..32d8464 100644
--- a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManager.java
+++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManager.java
@@ -36,4 +36,6 @@ public interface SyncQueueManager extends Manager {
     public List<SyncQueueItemVO> getBlockedQueueItems(long thresholdMs, boolean exclusive);
 
     void purgeAsyncJobQueueItemId(long asyncJobId);
+
+    public void cleanupActiveQueueItems(Long msid, boolean exclusive);
 }

http://git-wip-us.apache.org/repos/asf/cloudstack/blob/7cdb67dc/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManagerImpl.java
----------------------------------------------------------------------
diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManagerImpl.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManagerImpl.java
index 5160e05..1cfec4d 100644
--- a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManagerImpl.java
+++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/SyncQueueManagerImpl.java
@@ -26,7 +26,6 @@ import org.apache.log4j.Logger;
 
 import org.apache.cloudstack.framework.jobs.dao.SyncQueueDao;
 import org.apache.cloudstack.framework.jobs.dao.SyncQueueItemDao;
-
 import com.cloud.utils.DateUtil;
 import com.cloud.utils.component.ManagerBase;
 import com.cloud.utils.db.DB;
@@ -260,4 +259,16 @@ public class SyncQueueManagerImpl extends ManagerBase implements SyncQueueManage
             purgeItem(itemId);
         }
     }
+
+    @Override
+    public void cleanupActiveQueueItems(Long msid, boolean exclusive) {
+        List<SyncQueueItemVO> l = getActiveQueueItems(msid, false);
+        for (SyncQueueItemVO item : l) {
+            if (s_logger.isInfoEnabled()) {
+                s_logger.info("Discard left-over queue item: " + item.toString());
+            }
+            purgeItem(item.getId());
+        }
+    }
+
 }