You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cloudstack.apache.org by ke...@apache.org on 2013/10/17 02:01:09 UTC
git commit: updated refs/heads/vmsync-rebase to cf94cfb
Updated Branches:
refs/heads/vmsync-rebase 534345e8a -> cf94cfb3f
Merge VirtualMachineManger
Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo
Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/cf94cfb3
Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/cf94cfb3
Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/cf94cfb3
Branch: refs/heads/vmsync-rebase
Commit: cf94cfb3f217280c65198de6e9149684e2bf99d3
Parents: 534345e
Author: Kelven Yang <ke...@gmail.com>
Authored: Wed Oct 16 17:00:57 2013 -0700
Committer: Kelven Yang <ke...@gmail.com>
Committed: Wed Oct 16 17:00:57 2013 -0700
----------------------------------------------------------------------
api/src/com/cloud/vm/VirtualMachine.java | 15 +
.../src/com/cloud/alert/AlertManager.java | 1 +
.../com/cloud/vm/VirtualMachineManagerImpl.java | 365 ++++++++++++++++++-
.../cloud/vm/VirtualMachineManagerImplTest.java | 3 +-
.../framework/jobs/dao/VmWorkJobDao.java | 35 ++
.../framework/jobs/dao/VmWorkJobDaoImpl.java | 125 +++++++
.../framework/jobs/impl/VmWorkJobVO.java | 101 +++++
.../com/cloud/ha/CheckOnAgentInvestigator.java | 1 -
8 files changed, 635 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cloudstack/blob/cf94cfb3/api/src/com/cloud/vm/VirtualMachine.java
----------------------------------------------------------------------
diff --git a/api/src/com/cloud/vm/VirtualMachine.java b/api/src/com/cloud/vm/VirtualMachine.java
index 0d56826..6215283 100755
--- a/api/src/com/cloud/vm/VirtualMachine.java
+++ b/api/src/com/cloud/vm/VirtualMachine.java
@@ -111,6 +111,16 @@ public interface VirtualMachine extends RunningOn, ControlledEntity, Identity, I
s_fsm.addTransition(State.Expunging, VirtualMachine.Event.ExpungeOperation, State.Expunging);
s_fsm.addTransition(State.Error, VirtualMachine.Event.DestroyRequested, State.Expunging);
s_fsm.addTransition(State.Error, VirtualMachine.Event.ExpungeOperation, State.Expunging);
+
+ s_fsm.addTransition(State.Stopping, VirtualMachine.Event.FollowAgentPowerOnReport, State.Running);
+ s_fsm.addTransition(State.Stopped, VirtualMachine.Event.FollowAgentPowerOnReport, State.Running);
+ s_fsm.addTransition(State.Running, VirtualMachine.Event.FollowAgentPowerOnReport, State.Running);
+ s_fsm.addTransition(State.Migrating, VirtualMachine.Event.FollowAgentPowerOnReport, State.Running);
+
+ s_fsm.addTransition(State.Starting, VirtualMachine.Event.FollowAgentPowerOffReport, State.Stopped);
+ s_fsm.addTransition(State.Stopping, VirtualMachine.Event.FollowAgentPowerOffReport, State.Stopped);
+ s_fsm.addTransition(State.Running, VirtualMachine.Event.FollowAgentPowerOffReport, State.Stopped);
+ s_fsm.addTransition(State.Migrating, VirtualMachine.Event.FollowAgentPowerOffReport, State.Stopped);
}
public static boolean isVmStarted(State oldState, Event e, State newState) {
@@ -179,9 +189,14 @@ public interface VirtualMachine extends RunningOn, ControlledEntity, Identity, I
AgentReportMigrated,
RevertRequested,
SnapshotRequested,
+
+ // added for new VMSync logic
+ FollowAgentPowerOnReport,
+ FollowAgentPowerOffReport,
};
public enum Type {
+ Instance(false),
User(false),
DomainRouter(true),
ConsoleProxy(true),
http://git-wip-us.apache.org/repos/asf/cloudstack/blob/cf94cfb3/engine/components-api/src/com/cloud/alert/AlertManager.java
----------------------------------------------------------------------
diff --git a/engine/components-api/src/com/cloud/alert/AlertManager.java b/engine/components-api/src/com/cloud/alert/AlertManager.java
index 1ae6b1b..2b333e0 100755
--- a/engine/components-api/src/com/cloud/alert/AlertManager.java
+++ b/engine/components-api/src/com/cloud/alert/AlertManager.java
@@ -50,6 +50,7 @@ public interface AlertManager extends Manager {
public static final short ALERT_TYPE_DIRECT_ATTACHED_PUBLIC_IP = 24;
public static final short ALERT_TYPE_LOCAL_STORAGE = 25;
public static final short ALERT_TYPE_RESOURCE_LIMIT_EXCEEDED = 26; // Generated when the resource limit exceeds the limit. Currently used for recurring snapshots only
+ public static final short ALERT_TYPE_SYNC = 27;
static final ConfigKey<Double> StorageCapacityThreshold = new ConfigKey<Double>(Double.class, "cluster.storage.capacity.notificationthreshold", "Alert", "0.75",
"Percentage (as a value between 0 and 1) of storage utilization above which alerts will be sent about low storage available.", true, ConfigKey.Scope.Cluster, null);
http://git-wip-us.apache.org/repos/asf/cloudstack/blob/cf94cfb3/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java
----------------------------------------------------------------------
diff --git a/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java
index 8bf419f..591be6c 100755
--- a/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java
+++ b/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java
@@ -18,6 +18,10 @@
package com.cloud.vm;
import java.net.URI;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
@@ -26,6 +30,7 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.TimeZone;
import java.util.UUID;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
@@ -47,12 +52,17 @@ import org.apache.cloudstack.framework.config.ConfigDepot;
import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.cloudstack.framework.config.Configurable;
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
+import org.apache.cloudstack.framework.jobs.AsyncJobManager;
+import org.apache.cloudstack.framework.jobs.dao.VmWorkJobDao;
+import org.apache.cloudstack.framework.jobs.impl.VmWorkJobVO;
+import org.apache.cloudstack.framework.messagebus.MessageBus;
+import org.apache.cloudstack.framework.messagebus.MessageHandler;
+import org.apache.cloudstack.jobs.JobInfo;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import org.apache.cloudstack.storage.to.VolumeObjectTO;
-import org.apache.log4j.Logger;
import com.cloud.agent.AgentManager;
import com.cloud.agent.Listener;
@@ -78,7 +88,6 @@ import com.cloud.agent.api.StartAnswer;
import com.cloud.agent.api.StartCommand;
import com.cloud.agent.api.StartupCommand;
import com.cloud.agent.api.StartupRoutingCommand;
-import com.cloud.agent.api.StartupRoutingCommand.VmState;
import com.cloud.agent.api.StopAnswer;
import com.cloud.agent.api.StopCommand;
import com.cloud.agent.api.UnPlugNicAnswer;
@@ -155,6 +164,7 @@ import com.cloud.storage.dao.VolumeDao;
import com.cloud.template.VirtualMachineTemplate;
import com.cloud.user.Account;
import com.cloud.user.User;
+import com.cloud.utils.DateUtil;
import com.cloud.utils.Journal;
import com.cloud.utils.Pair;
import com.cloud.utils.StringUtils;
@@ -171,6 +181,7 @@ import com.cloud.utils.fsm.NoTransitionException;
import com.cloud.utils.fsm.StateMachine2;
import com.cloud.vm.ItWorkVO.Step;
import com.cloud.vm.VirtualMachine.Event;
+import com.cloud.vm.VirtualMachine.PowerState;
import com.cloud.vm.VirtualMachine.State;
import com.cloud.vm.dao.NicDao;
import com.cloud.vm.dao.UserVmDao;
@@ -185,6 +196,8 @@ import com.cloud.vm.snapshot.dao.VMSnapshotDao;
public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMachineManager, Listener, Configurable {
private static final Logger s_logger = Logger.getLogger(VirtualMachineManagerImpl.class);
+ private static final String VM_SYNC_ALERT_SUBJECT = "VM state sync alert";
+
@Inject
DataStoreManager dataStoreMgr;
@Inject
@@ -278,6 +291,11 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
@Inject
DeploymentPlanningManager _dpMgr;
+ @Inject protected MessageBus _messageBus;
+ @Inject protected VirtualMachinePowerStateSync _syncMgr;
+ @Inject protected VmWorkJobDao _workJobDao;
+ @Inject protected AsyncJobManager _jobMgr;
+
Map<VirtualMachine.Type, VirtualMachineGuru> _vmGurus = new HashMap<VirtualMachine.Type, VirtualMachineGuru>();
protected StateMachine2<State, VirtualMachine.Event, VirtualMachine> _stateMachine;
@@ -297,6 +315,9 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
"On destroy, force-stop takes this value ", true);
static final ConfigKey<Integer> ClusterDeltaSyncInterval = new ConfigKey<Integer>("Advanced", Integer.class, "sync.interval", "60", "Cluster Delta sync interval in seconds",
false);
+
+ protected static final ConfigKey<Long> PingInterval = new ConfigKey<Long>("Advanced",
+ Long.class, "ping.interval", "60", "Ping interval in seconds", false);
ScheduledExecutorService _executor = null;
@@ -1325,7 +1346,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
protected boolean checkVmOnHost(VirtualMachine vm, long hostId) throws AgentUnavailableException, OperationTimedoutException {
CheckVirtualMachineAnswer answer = (CheckVirtualMachineAnswer)_agentMgr.send(hostId, new CheckVirtualMachineCommand(vm.getInstanceName()));
- if (!answer.getResult() || answer.getState() == State.Stopped) {
+ if (!answer.getResult() || answer.getState() == PowerState.PowerOff) {
return false;
}
@@ -1910,6 +1931,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
return new StopCommand(vmName, getExecuteInSequence());
}
+/*
public Commands fullHostSync(final long hostId, StartupRoutingCommand startup) {
Commands commands = new Commands(Command.OnError.Continue);
@@ -1968,7 +1990,9 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
return commands;
}
+*/
+/*
public Commands deltaHostSync(long hostId, Map<String, State> newStates) {
Map<Long, AgentVmInfo> states = convertDeltaToInfos(newStates);
Commands commands = new Commands(Command.OnError.Continue);
@@ -1996,7 +2020,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
return commands;
}
-
+*/
public void deltaSync(Map<String, Pair<String, State>> newStates) {
Map<Long, AgentVmInfo> states = convertToInfos(newStates);
@@ -2185,7 +2209,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
}
return map;
}
-
+/*
protected Map<Long, AgentVmInfo> convertToInfos(StartupRoutingCommand cmd) {
final Map<String, VmState> states = cmd.getVmStates();
final HashMap<Long, AgentVmInfo> map = new HashMap<Long, AgentVmInfo>();
@@ -2203,7 +2227,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
return map;
}
-
+*/
protected Map<Long, AgentVmInfo> convertDeltaToInfos(final Map<String, State> states) {
final HashMap<Long, AgentVmInfo> map = new HashMap<Long, AgentVmInfo>();
@@ -2229,7 +2253,9 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
*
*/
protected Command compareState(long hostId, VMInstanceVO vm, final AgentVmInfo info, final boolean fullSync, boolean trackExternalChange) {
- State agentState = info.state;
+ return null;
+/*
+ State agentState = info.state;
final State serverState = vm.getState();
final String serverName = vm.getInstanceName();
@@ -2401,10 +2427,12 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
}
}
return command;
+*/
}
private void ensureVmRunningContext(long hostId, VMInstanceVO vm, Event cause) throws OperationTimedoutException, ResourceUnavailableException, NoTransitionException,
InsufficientAddressCapacityException {
+/*
VirtualMachineGuru vmGuru = getVmGuru(vm);
s_logger.debug("VM state is starting on full sync so updating it to running");
@@ -2461,6 +2489,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
work.setStep(Step.Done);
_workDao.update(work.getId(), work);
}
+*/
}
@Override
@@ -2474,7 +2503,9 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
if (answer instanceof ClusterSyncAnswer) {
ClusterSyncAnswer hs = (ClusterSyncAnswer)answer;
if (!hs.isExceuted()) {
+/*
deltaSync(hs.getNewStates());
+*/
hs.setExecuted();
}
}
@@ -2497,6 +2528,25 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
boolean processed = false;
for (Command cmd : cmds) {
if (cmd instanceof PingRoutingCommand) {
+ PingRoutingCommand ping = (PingRoutingCommand) cmd;
+ if (ping.getNewStates() != null && ping.getNewStates().size() > 0) {
+ _syncMgr.processHostVmStatePingReport(agentId, ping.getNewStates());
+ }
+
+ // take the chance to scan VMs that are stuck in transitional states and are missing from the report
+ scanStalledVMInTransitionStateOnUpHost(agentId);
+ processed = true;
+ }
+ }
+ return processed;
+ }
+
+/*
+ @Override
+ public boolean processCommands(long agentId, long seq, Command[] cmds) {
+ boolean processed = false;
+ for (Command cmd : cmds) {
+ if (cmd instanceof PingRoutingCommand) {
PingRoutingCommand ping = (PingRoutingCommand)cmd;
if (ping.getNewStates() != null && ping.getNewStates().size() > 0) {
Commands commands = deltaHostSync(agentId, ping.getNewStates());
@@ -2513,7 +2563,8 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
}
return processed;
}
-
+*/
+
@Override
public AgentControlAnswer processControlCommand(long agentId, AgentControlCommand cmd) {
return null;
@@ -2523,7 +2574,19 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
public boolean processDisconnect(long agentId, Status state) {
return true;
}
+
+ @Override
+ public void processConnect(Host agent, StartupCommand cmd, boolean forRebalance) throws ConnectionException {
+ if (!(cmd instanceof StartupRoutingCommand)) {
+ return;
+ }
+
+ if(s_logger.isDebugEnabled())
+ s_logger.debug("Received startup command from hypervisor host. host id: " + agent.getId());
+ _syncMgr.resetHostSyncState(agent.getId());
+ }
+/*
@Override
public void processConnect(Host agent, StartupCommand cmd, boolean forRebalance) throws ConnectionException {
if (!(cmd instanceof StartupRoutingCommand)) {
@@ -2588,7 +2651,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
}
}
-
+*/
protected class TransitionTask extends ManagedContextRunnable {
@Override
protected void runInContext() {
@@ -2603,6 +2666,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
return;
}
try {
+/*
lock.addRef();
List<VMInstanceVO> instances = _vmDao.findVMInTransition(new Date(new Date().getTime() - (AgentManager.Wait.value() * 1000)), State.Starting, State.Stopping);
for (VMInstanceVO instance : instances) {
@@ -2613,6 +2677,10 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
_haMgr.scheduleRestart(instance, true);
}
}
+*/
+
+ scanStalledVMInTransitionStateOnDisconnectedHosts();
+
} catch (Exception e) {
s_logger.warn("Caught the following exception on transition checking", e);
} finally {
@@ -3280,4 +3348,283 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
this._storagePoolAllocators = storagePoolAllocators;
}
+ //
+ // PowerState report handling for out-of-band changes and handling of left-over transitional VM states
+ //
+
+ @MessageHandler(topic = Topics.VM_POWER_STATE)
+ private void HandlePownerStateReport(Object target, String subject, String senderAddress, Object args) {
+ assert(args != null);
+ Long vmId = (Long)args;
+
+ List<VmWorkJobVO> pendingWorkJobs = _workJobDao.listPendingWorkJobs(
+ VirtualMachine.Type.Instance, vmId);
+ if(pendingWorkJobs.size() == 0) {
+ // there is no pending operation job
+ VMInstanceVO vm = _vmDao.findById(vmId);
+ if(vm != null) {
+ switch(vm.getPowerState()) {
+ case PowerOn :
+ handlePowerOnReportWithNoPendingJobsOnVM(vm);
+ break;
+
+ case PowerOff :
+ handlePowerOffReportWithNoPendingJobsOnVM(vm);
+ break;
+
+ // PowerUnknown shouldn't be reported, it is a derived
+ // VM power state from host state (host un-reachable
+ case PowerUnknown :
+ default :
+ assert(false);
+ break;
+ }
+ } else {
+ s_logger.warn("VM " + vmId + " no longer exists when processing VM state report");
+ }
+ } else {
+ // TODO, do job wake-up signalling, since currently async job wake-up is not in use
+ // we will skip it for nows
+ }
+ }
+
+ private void handlePowerOnReportWithNoPendingJobsOnVM(VMInstanceVO vm) {
+ //
+ // 1) handle left-over transitional VM states
+ // 2) handle out of band VM live migration
+ // 3) handle out of sync stationary states, marking VM from Stopped to Running with
+ // alert messages
+ //
+ switch(vm.getState()) {
+ case Starting :
+ try {
+ stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOnReport, vm.getPowerHostId());
+ } catch(NoTransitionException e) {
+ s_logger.warn("Unexpected VM state transition exception, race-condition?", e);
+ }
+
+ // we need to alert admin or user about this risky state transition
+ _alertMgr.sendAlert(AlertManager.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(),
+ VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName() + ") state is sync-ed (Starting -> Running) from out-of-context transition. VM network environment may need to be reset");
+ break;
+
+ case Running :
+ try {
+ if(vm.getHostId() != null && vm.getHostId().longValue() != vm.getPowerHostId().longValue())
+ s_logger.info("Detected out of band VM migration from host " + vm.getHostId() + " to host " + vm.getPowerHostId());
+ stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOnReport, vm.getPowerHostId());
+ } catch(NoTransitionException e) {
+ s_logger.warn("Unexpected VM state transition exception, race-condition?", e);
+ }
+ break;
+
+ case Stopping :
+ case Stopped :
+ try {
+ stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOnReport, vm.getPowerHostId());
+ } catch(NoTransitionException e) {
+ s_logger.warn("Unexpected VM state transition exception, race-condition?", e);
+ }
+ _alertMgr.sendAlert(AlertManager.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(),
+ VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName() + ") state is sync-ed (" + vm.getState() + " -> Running) from out-of-context transition. VM network environment may need to be reset");
+ break;
+
+ case Destroyed :
+ case Expunging :
+ s_logger.info("Receive power on report when VM is in destroyed or expunging state. vm: "
+ + vm.getId() + ", state: " + vm.getState());
+ break;
+
+ case Migrating :
+ try {
+ stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOnReport, vm.getPowerHostId());
+ } catch(NoTransitionException e) {
+ s_logger.warn("Unexpected VM state transition exception, race-condition?", e);
+ }
+ break;
+
+ case Error :
+ default :
+ s_logger.info("Receive power on report when VM is in error or unexpected state. vm: "
+ + vm.getId() + ", state: " + vm.getState());
+ break;
+ }
+ }
+
+ private void handlePowerOffReportWithNoPendingJobsOnVM(VMInstanceVO vm) {
+
+ // 1) handle left-over transitional VM states
+ // 2) handle out of sync stationary states, schedule force-stop to release resources
+ //
+ switch(vm.getState()) {
+ case Starting :
+ case Stopping :
+ case Stopped :
+ case Migrating :
+ try {
+ stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOffReport, vm.getPowerHostId());
+ } catch(NoTransitionException e) {
+ s_logger.warn("Unexpected VM state transition exception, race-condition?", e);
+ }
+ _alertMgr.sendAlert(AlertManager.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(),
+ VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName() + ") state is sync-ed (" + vm.getState() + " -> Stopped) from out-of-context transition.");
+ // TODO: we need to forcely release all resource allocation
+ break;
+
+ case Running :
+ case Destroyed :
+ case Expunging :
+ break;
+
+ case Error :
+ default :
+ break;
+ }
+ }
+
+ private void scanStalledVMInTransitionStateOnUpHost(long hostId) {
+ //
+ // Check VM that is stuck in Starting, Stopping, Migrating states, we won't check
+ // VMs in expunging state (this need to be handled specially)
+ //
+ // checking condition
+ // 1) no pending VmWork job
+ // 2) on hostId host and host is UP
+ //
+ // When host is UP, soon or later we will get a report from the host about the VM,
+ // however, if VM is missing from the host report (it may happen in out of band changes
+ // or from designed behave of XS/KVM), the VM may not get a chance to run the state-sync logic
+ //
+ // Therefor, we will scan thoses VMs on UP host based on last update timestamp, if the host is UP
+ // and a VM stalls for status update, we will consider them to be powered off
+ // (which is relatively safe to do so)
+
+ long stallThresholdInMs = PingInterval.value() + (PingInterval.value() >> 1);
+ Date cutTime = new Date(DateUtil.currentGMTTime().getTime() - stallThresholdInMs);
+ List<Long> mostlikelyStoppedVMs = listStalledVMInTransitionStateOnUpHost(hostId, cutTime);
+ for(Long vmId : mostlikelyStoppedVMs) {
+ VMInstanceVO vm = _vmDao.findById(vmId);
+ assert(vm != null);
+ handlePowerOffReportWithNoPendingJobsOnVM(vm);
+ }
+
+ List<Long> vmsWithRecentReport = listVMInTransitionStateWithRecentReportOnUpHost(hostId, cutTime);
+ for(Long vmId : vmsWithRecentReport) {
+ VMInstanceVO vm = _vmDao.findById(vmId);
+ assert(vm != null);
+ if(vm.getPowerState() == PowerState.PowerOn)
+ handlePowerOnReportWithNoPendingJobsOnVM(vm);
+ else
+ handlePowerOffReportWithNoPendingJobsOnVM(vm);
+ }
+ }
+
+ private void scanStalledVMInTransitionStateOnDisconnectedHosts() {
+ Date cutTime = new Date(DateUtil.currentGMTTime().getTime() - VmOpWaitInterval.value()*1000);
+ List<Long> stuckAndUncontrollableVMs = listStalledVMInTransitionStateOnDisconnectedHosts(cutTime);
+ for(Long vmId : stuckAndUncontrollableVMs) {
+ VMInstanceVO vm = _vmDao.findById(vmId);
+
+ // We now only alert administrator about this situation
+ _alertMgr.sendAlert(AlertManager.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(),
+ VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName() + ") is stuck in " + vm.getState() + " state and its host is unreachable for too long");
+ }
+ }
+
+
+ // VMs that in transitional state without recent power state report
+ private List<Long> listStalledVMInTransitionStateOnUpHost(long hostId, Date cutTime) {
+ String sql = "SELECT i.* FROM vm_instance as i, host as h WHERE h.status = 'UP' " +
+ "AND h.id = ? AND i.power_state_update_time < ? AND i.host_id = h.id " +
+ "AND (i.state ='Starting' OR i.state='Stopping' OR i.state='Migrating') " +
+ "AND i.id NOT IN (SELECT w.vm_instance_id FROM vm_work_job AS w JOIN async_job AS j ON w.id = j.id WHERE j.job_status = ?)";
+
+ List<Long> l = new ArrayList<Long>();
+ Transaction txn = null;
+ try {
+ txn = Transaction.open(Transaction.CLOUD_DB);
+
+ PreparedStatement pstmt = null;
+ try {
+ pstmt = txn.prepareAutoCloseStatement(sql);
+
+ pstmt.setLong(1, hostId);
+ pstmt.setString(2, DateUtil.getDateDisplayString(TimeZone.getTimeZone("GMT"), cutTime));
+ pstmt.setInt(3, JobInfo.Status.IN_PROGRESS.ordinal());
+ ResultSet rs = pstmt.executeQuery();
+ while(rs.next()) {
+ l.add(rs.getLong(1));
+ }
+ } catch (SQLException e) {
+ } catch (Throwable e) {
+ }
+
+ } finally {
+ if(txn != null)
+ txn.close();
+ }
+ return l;
+ }
+
+ // VMs that in transitional state and recently have power state update
+ private List<Long> listVMInTransitionStateWithRecentReportOnUpHost(long hostId, Date cutTime) {
+ String sql = "SELECT i.* FROM vm_instance as i, host as h WHERE h.status = 'UP' " +
+ "AND h.id = ? AND i.power_state_update_time > ? AND i.host_id = h.id " +
+ "AND (i.state ='Starting' OR i.state='Stopping' OR i.state='Migrating') " +
+ "AND i.id NOT IN (SELECT w.vm_instance_id FROM vm_work_job AS w JOIN async_job AS j ON w.id = j.id WHERE j.job_status = ?)";
+
+ List<Long> l = new ArrayList<Long>();
+ Transaction txn = null;
+ try {
+ txn = Transaction.open(Transaction.CLOUD_DB);
+ PreparedStatement pstmt = null;
+ try {
+ pstmt = txn.prepareAutoCloseStatement(sql);
+
+ pstmt.setLong(1, hostId);
+ pstmt.setString(2, DateUtil.getDateDisplayString(TimeZone.getTimeZone("GMT"), cutTime));
+ pstmt.setInt(3, JobInfo.Status.IN_PROGRESS.ordinal());
+ ResultSet rs = pstmt.executeQuery();
+ while(rs.next()) {
+ l.add(rs.getLong(1));
+ }
+ } catch (SQLException e) {
+ } catch (Throwable e) {
+ }
+ return l;
+ } finally {
+ if(txn != null)
+ txn.close();
+ }
+ }
+
+ private List<Long> listStalledVMInTransitionStateOnDisconnectedHosts(Date cutTime) {
+ String sql = "SELECT i.* FROM vm_instance as i, host as h WHERE h.status != 'UP' " +
+ "AND i.power_state_update_time < ? AND i.host_id = h.id " +
+ "AND (i.state ='Starting' OR i.state='Stopping' OR i.state='Migrating') " +
+ "AND i.id NOT IN (SELECT w.vm_instance_id FROM vm_work_job AS w JOIN async_job AS j ON w.id = j.id WHERE j.job_status = ?)";
+
+ List<Long> l = new ArrayList<Long>();
+ Transaction txn = null;
+ try {
+ txn = Transaction.open(Transaction.CLOUD_DB);
+ PreparedStatement pstmt = null;
+ try {
+ pstmt = txn.prepareAutoCloseStatement(sql);
+
+ pstmt.setString(1, DateUtil.getDateDisplayString(TimeZone.getTimeZone("GMT"), cutTime));
+ pstmt.setInt(2, JobInfo.Status.IN_PROGRESS.ordinal());
+ ResultSet rs = pstmt.executeQuery();
+ while(rs.next()) {
+ l.add(rs.getLong(1));
+ }
+ } catch (SQLException e) {
+ } catch (Throwable e) {
+ }
+ return l;
+ } finally {
+ if(txn != null)
+ txn.close();
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/cloudstack/blob/cf94cfb3/engine/orchestration/test/com/cloud/vm/VirtualMachineManagerImplTest.java
----------------------------------------------------------------------
diff --git a/engine/orchestration/test/com/cloud/vm/VirtualMachineManagerImplTest.java b/engine/orchestration/test/com/cloud/vm/VirtualMachineManagerImplTest.java
index 7d55064..f864bab 100644
--- a/engine/orchestration/test/com/cloud/vm/VirtualMachineManagerImplTest.java
+++ b/engine/orchestration/test/com/cloud/vm/VirtualMachineManagerImplTest.java
@@ -98,6 +98,7 @@ import com.cloud.utils.Pair;
import com.cloud.utils.db.EntityManager;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.vm.VirtualMachine.Event;
+import com.cloud.vm.VirtualMachine.PowerState;
import com.cloud.vm.VirtualMachine.State;
import com.cloud.vm.dao.UserVmDao;
import com.cloud.vm.dao.UserVmDetailsDao;
@@ -397,7 +398,7 @@ public class VirtualMachineManagerImplTest {
CheckVirtualMachineAnswer checkVmAnswerMock = mock(CheckVirtualMachineAnswer.class);
when(checkVmAnswerMock.getResult()).thenReturn(true);
- when(checkVmAnswerMock.getState()).thenReturn(State.Running);
+ when(checkVmAnswerMock.getState()).thenReturn(PowerState.PowerOn);
when(_agentMgr.send(anyLong(), isA(CheckVirtualMachineCommand.class))).thenReturn(checkVmAnswerMock);
// Mock the state transitions of vm.
http://git-wip-us.apache.org/repos/asf/cloudstack/blob/cf94cfb3/framework/jobs/src/org/apache/cloudstack/framework/jobs/dao/VmWorkJobDao.java
----------------------------------------------------------------------
diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/dao/VmWorkJobDao.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/dao/VmWorkJobDao.java
new file mode 100644
index 0000000..dfb063f
--- /dev/null
+++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/dao/VmWorkJobDao.java
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+package org.apache.cloudstack.framework.jobs.dao;
+
+import java.util.Date;
+import java.util.List;
+
+import org.apache.cloudstack.framework.jobs.impl.VmWorkJobVO;
+import org.apache.cloudstack.framework.jobs.impl.VmWorkJobVO.Step;
+
+import com.cloud.utils.db.GenericDao;
+import com.cloud.vm.VirtualMachine;
+
+public interface VmWorkJobDao extends GenericDao<VmWorkJobVO, Long> {
+ VmWorkJobVO findPendingWorkJob(VirtualMachine.Type type, long instanceId);
+ List<VmWorkJobVO> listPendingWorkJobs(VirtualMachine.Type type, long instanceId);
+ List<VmWorkJobVO> listPendingWorkJobs(VirtualMachine.Type type, long instanceId, String jobCmd);
+
+ void updateStep(long workJobId, Step step);
+ void expungeCompletedWorkJobs(Date cutDate);
+}
http://git-wip-us.apache.org/repos/asf/cloudstack/blob/cf94cfb3/framework/jobs/src/org/apache/cloudstack/framework/jobs/dao/VmWorkJobDaoImpl.java
----------------------------------------------------------------------
diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/dao/VmWorkJobDaoImpl.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/dao/VmWorkJobDaoImpl.java
new file mode 100644
index 0000000..77515a7
--- /dev/null
+++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/dao/VmWorkJobDaoImpl.java
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+package org.apache.cloudstack.framework.jobs.dao;
+
+import java.util.Date;
+import java.util.List;
+
+import javax.annotation.PostConstruct;
+
+import org.apache.cloudstack.framework.jobs.impl.VmWorkJobVO;
+import org.apache.cloudstack.framework.jobs.impl.VmWorkJobVO.Step;
+import org.apache.cloudstack.jobs.JobInfo;
+
+import com.cloud.utils.DateUtil;
+import com.cloud.utils.db.Filter;
+import com.cloud.utils.db.GenericDaoBase;
+import com.cloud.utils.db.SearchBuilder;
+import com.cloud.utils.db.SearchCriteria;
+import com.cloud.utils.db.SearchCriteria.Op;
+import com.cloud.vm.VirtualMachine;
+
+public class VmWorkJobDaoImpl extends GenericDaoBase<VmWorkJobVO, Long> implements VmWorkJobDao {
+
+ protected SearchBuilder<VmWorkJobVO> PendingWorkJobSearch;
+ protected SearchBuilder<VmWorkJobVO> PendingWorkJobByCommandSearch;
+ protected SearchBuilder<VmWorkJobVO> ExpungeWorkJobSearch;
+
+ public VmWorkJobDaoImpl() {
+ }
+
+ @PostConstruct
+ public void init() {
+ PendingWorkJobSearch = createSearchBuilder();
+ PendingWorkJobSearch.and("jobStatus", PendingWorkJobSearch.entity().getStatus(), Op.EQ);
+ PendingWorkJobSearch.and("vmType", PendingWorkJobSearch.entity().getVmType(), Op.EQ);
+ PendingWorkJobSearch.and("vmInstanceId", PendingWorkJobSearch.entity().getVmInstanceId(), Op.EQ);
+ PendingWorkJobSearch.and("step", PendingWorkJobSearch.entity().getStep(), Op.NEQ);
+ PendingWorkJobSearch.done();
+
+ PendingWorkJobByCommandSearch = createSearchBuilder();
+ PendingWorkJobByCommandSearch.and("jobStatus", PendingWorkJobByCommandSearch.entity().getStatus(), Op.EQ);
+ PendingWorkJobByCommandSearch.and("vmType", PendingWorkJobByCommandSearch.entity().getVmType(), Op.EQ);
+ PendingWorkJobByCommandSearch.and("vmInstanceId", PendingWorkJobByCommandSearch.entity().getVmInstanceId(), Op.EQ);
+ PendingWorkJobByCommandSearch.and("step", PendingWorkJobByCommandSearch.entity().getStep(), Op.NEQ);
+ PendingWorkJobByCommandSearch.and("cmd", PendingWorkJobByCommandSearch.entity().getCmd(), Op.EQ);
+ PendingWorkJobByCommandSearch.done();
+
+ ExpungeWorkJobSearch = createSearchBuilder();
+ ExpungeWorkJobSearch.and("lastUpdated", ExpungeWorkJobSearch.entity().getLastUpdated(), Op.LT);
+ ExpungeWorkJobSearch.and("jobStatus", ExpungeWorkJobSearch.entity().getStatus(), Op.NEQ);
+ ExpungeWorkJobSearch.done();
+ }
+
+ @Override
+ public VmWorkJobVO findPendingWorkJob(VirtualMachine.Type type, long instanceId) {
+
+ SearchCriteria<VmWorkJobVO> sc = PendingWorkJobSearch.create();
+ sc.setParameters("jobStatus", JobInfo. Status.IN_PROGRESS);
+ sc.setParameters("vmType", type);
+ sc.setParameters("vmInstanceId", instanceId);
+
+ Filter filter = new Filter(VmWorkJobVO.class, "created", true, null, null);
+ List<VmWorkJobVO> result = this.listBy(sc, filter);
+ if(result != null && result.size() > 0)
+ return result.get(0);
+
+ return null;
+ }
+
+ @Override
+ public List<VmWorkJobVO> listPendingWorkJobs(VirtualMachine.Type type, long instanceId) {
+
+ SearchCriteria<VmWorkJobVO> sc = PendingWorkJobSearch.create();
+ sc.setParameters("jobStatus", JobInfo.Status.IN_PROGRESS);
+ sc.setParameters("vmType", type);
+ sc.setParameters("vmInstanceId", instanceId);
+
+ Filter filter = new Filter(VmWorkJobVO.class, "created", true, null, null);
+ return this.listBy(sc, filter);
+ }
+
+ @Override
+ public List<VmWorkJobVO> listPendingWorkJobs(VirtualMachine.Type type, long instanceId, String jobCmd) {
+
+ SearchCriteria<VmWorkJobVO> sc = PendingWorkJobByCommandSearch.create();
+ sc.setParameters("jobStatus", JobInfo.Status.IN_PROGRESS);
+ sc.setParameters("vmType", type);
+ sc.setParameters("vmInstanceId", instanceId);
+ sc.setParameters("cmd", jobCmd);
+
+ Filter filter = new Filter(VmWorkJobVO.class, "created", true, null, null);
+ return this.listBy(sc, filter);
+ }
+
+ @Override
+ public void updateStep(long workJobId, Step step) {
+ VmWorkJobVO jobVo = findById(workJobId);
+ jobVo.setStep(step);
+ jobVo.setLastUpdated(DateUtil.currentGMTTime());
+ update(workJobId, jobVo);
+ }
+
+ @Override
+ public void expungeCompletedWorkJobs(Date cutDate) {
+ SearchCriteria<VmWorkJobVO> sc = ExpungeWorkJobSearch.create();
+ sc.setParameters("lastUpdated",cutDate);
+ sc.setParameters("jobStatus", JobInfo.Status.IN_PROGRESS);
+
+ expunge(sc);
+ }
+}
http://git-wip-us.apache.org/repos/asf/cloudstack/blob/cf94cfb3/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/VmWorkJobVO.java
----------------------------------------------------------------------
diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/VmWorkJobVO.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/VmWorkJobVO.java
new file mode 100644
index 0000000..860cc57
--- /dev/null
+++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/VmWorkJobVO.java
@@ -0,0 +1,101 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+package org.apache.cloudstack.framework.jobs.impl;
+
+import javax.persistence.Column;
+import javax.persistence.DiscriminatorValue;
+import javax.persistence.Entity;
+import javax.persistence.EnumType;
+import javax.persistence.Enumerated;
+import javax.persistence.PrimaryKeyJoinColumn;
+import javax.persistence.Table;
+
+import org.apache.cloudstack.framework.jobs.impl.AsyncJobVO;
+
+import com.cloud.vm.VirtualMachine;
+
+@Entity
+@Table(name="vm_work_job")
+@DiscriminatorValue(value="VmWork")
+@PrimaryKeyJoinColumn(name="id")
+public class VmWorkJobVO extends AsyncJobVO {
+
+ // These steps are rather arbitrary. What's recorded depends on the
+ // the operation being performed.
+ public enum Step {
+ Filed(false),
+ Prepare(false),
+ Starting(true),
+ Started(false),
+ Release(false),
+ Done(false),
+ Migrating(true),
+ Reconfiguring(false),
+ Error(false);
+
+ boolean updateState; // Should the VM State be updated after this step?
+ private Step(boolean updateState) {
+ this.updateState = updateState;
+ }
+
+ boolean updateState() {
+ return updateState;
+ }
+ }
+
+ @Column(name="step")
+ Step step;
+
+ @Column(name="vm_type")
+ @Enumerated(value=EnumType.STRING)
+ VirtualMachine.Type vmType;
+
+ @Column(name="vm_instance_id")
+ long vmInstanceId;
+
+ protected VmWorkJobVO() {
+ }
+
+ public VmWorkJobVO(String related) {
+ step = Step.Filed;
+ setRelated(related);
+ }
+
+ public Step getStep() {
+ return step;
+ }
+
+ public void setStep(Step step) {
+ this.step = step;
+ }
+
+ public VirtualMachine.Type getVmType() {
+ return vmType;
+ }
+
+ public void setVmType(VirtualMachine.Type vmType) {
+ this.vmType = vmType;
+ }
+
+ public long getVmInstanceId() {
+ return vmInstanceId;
+ }
+
+ public void setVmInstanceId(long vmInstanceId) {
+ this.vmInstanceId = vmInstanceId;
+ }
+}
http://git-wip-us.apache.org/repos/asf/cloudstack/blob/cf94cfb3/server/src/com/cloud/ha/CheckOnAgentInvestigator.java
----------------------------------------------------------------------
diff --git a/server/src/com/cloud/ha/CheckOnAgentInvestigator.java b/server/src/com/cloud/ha/CheckOnAgentInvestigator.java
index afb2882..6d88d18 100644
--- a/server/src/com/cloud/ha/CheckOnAgentInvestigator.java
+++ b/server/src/com/cloud/ha/CheckOnAgentInvestigator.java
@@ -31,7 +31,6 @@ import com.cloud.host.Status;
import com.cloud.utils.component.AdapterBase;
import com.cloud.vm.VirtualMachine;
import com.cloud.vm.VirtualMachine.PowerState;
-import com.cloud.vm.VirtualMachine.State;
@Local(value=Investigator.class)
public class CheckOnAgentInvestigator extends AdapterBase implements Investigator {