You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cloudstack.apache.org by ko...@apache.org on 2015/09/01 14:31:51 UTC

[1/2] git commit: updated refs/heads/master to f732c7d

Repository: cloudstack
Updated Branches:
  refs/heads/master c8bfeb88c -> f732c7d1e


CLOUDSTACK-8785: Proper enforcement of retry count (max.retries) for all work type handled by HighAvailability manager
Retry count is properly enforced for all work types in HA manager. Also reorganized some of the code for easy testing.


Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo
Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/cbf2c3bb
Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/cbf2c3bb
Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/cbf2c3bb

Branch: refs/heads/master
Commit: cbf2c3bbf66214ec4a6b6f0666657185446c88ce
Parents: cab57b2
Author: Koushik Das <ko...@apache.org>
Authored: Fri Aug 28 17:59:17 2015 +0530
Committer: Koushik Das <ko...@apache.org>
Committed: Fri Aug 28 17:59:17 2015 +0530

----------------------------------------------------------------------
 .../cloud/ha/HighAvailabilityManagerImpl.java   | 126 +++++++++----------
 .../ha/HighAvailabilityManagerImplTest.java     |  44 +++++++
 2 files changed, 103 insertions(+), 67 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cloudstack/blob/cbf2c3bb/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java
----------------------------------------------------------------------
diff --git a/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java b/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java
index bec1fd5..cce4457 100644
--- a/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java
+++ b/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java
@@ -383,10 +383,10 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
         }
 
         List<HaWorkVO> items = _haDao.findPreviousHA(vm.getId());
-        int maxRetries = 0;
+        int timesTried = 0;
         for (HaWorkVO item : items) {
-            if (maxRetries < item.getTimesTried() && !item.canScheduleNew(_timeBetweenFailures)) {
-                maxRetries = item.getTimesTried();
+            if (timesTried < item.getTimesTried() && !item.canScheduleNew(_timeBetweenFailures)) {
+                timesTried = item.getTimesTried();
                 break;
             }
         }
@@ -396,7 +396,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
         }
 
         HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.HA, investigate ? Step.Investigating : Step.Scheduled,
-                hostId != null ? hostId : 0L, vm.getState(), maxRetries + 1, vm.getUpdated());
+                hostId != null ? hostId : 0L, vm.getState(), timesTried, vm.getUpdated());
         _haDao.persist(work);
 
         if (s_logger.isInfoEnabled()) {
@@ -407,7 +407,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
 
     }
 
-    protected Long restart(HaWorkVO work) {
+    protected Long restart(final HaWorkVO work) {
         List<HaWorkVO> items = _haDao.listFutureHaWorkForVm(work.getInstanceId(), work.getId());
         if (items.size() > 0) {
             StringBuilder str = new StringBuilder("Cancelling this work item because newer ones have been scheduled.  Work Ids = [");
@@ -571,11 +571,6 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
             return null;
         }
 
-        if (work.getTimesTried() > _maxRetries) {
-            s_logger.warn("Retried to max times so deleting: " + vmId);
-            return null;
-        }
-
         try {
             HashMap<VirtualMachineProfile.Param, Object> params = new HashMap<VirtualMachineProfile.Param, Object>();
             if (_haTag != null) {
@@ -663,7 +658,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
         _haDao.delete(vm.getId(), WorkType.Destroy);
     }
 
-    protected Long destroyVM(HaWorkVO work) {
+    protected Long destroyVM(final HaWorkVO work) {
         final VirtualMachine vm = _itMgr.findById(work.getInstanceId());
         s_logger.info("Destroying " + vm.toString());
         try {
@@ -690,7 +685,6 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
             s_logger.debug("concurrent operation: " + e.getMessage());
         }
 
-        work.setTimesTried(work.getTimesTried() + 1);
         return (System.currentTimeMillis() >> 10) + _stopRetryInterval;
     }
 
@@ -738,10 +732,6 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
             s_logger.debug("operation timed out: " + e.getMessage());
         }
 
-        work.setTimesTried(work.getTimesTried() + 1);
-        if (s_logger.isDebugEnabled()) {
-            s_logger.debug("Stop was unsuccessful.  Rescheduling");
-        }
         return (System.currentTimeMillis() >> 10) + _stopRetryInterval;
     }
 
@@ -765,6 +755,56 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
         return vms;
     }
 
+    private void rescheduleWork(final HaWorkVO work, final long nextTime) {
+        s_logger.info("Rescheduling work " + work + " to try again at " + new Date(nextTime << 10));
+        work.setTimeToTry(nextTime);
+        work.setTimesTried(work.getTimesTried() + 1);
+        work.setServerId(null);
+        work.setDateTaken(null);
+    }
+
+    private void processWork(final HaWorkVO work) {
+        try {
+            final WorkType wt = work.getWorkType();
+            Long nextTime = null;
+            if (wt == WorkType.Migration) {
+                nextTime = migrate(work);
+            } else if (wt == WorkType.HA) {
+                nextTime = restart(work);
+            } else if (wt == WorkType.Stop || wt == WorkType.CheckStop || wt == WorkType.ForceStop) {
+                nextTime = stopVM(work);
+            } else if (wt == WorkType.Destroy) {
+                nextTime = destroyVM(work);
+            } else {
+                assert false : "How did we get here with " + wt.toString();
+                return;
+            }
+
+            if (nextTime == null) {
+                s_logger.info("Completed work " + work);
+                work.setStep(Step.Done);
+            } else {
+                rescheduleWork(work, nextTime.longValue());
+            }
+        } catch (Exception e) {
+            s_logger.warn("Encountered unhandled exception during HA process, reschedule work", e);
+
+            long nextTime = (System.currentTimeMillis() >> 10) + _restartRetryInterval;
+            rescheduleWork(work, nextTime);
+
+            // if restart failed in the middle due to exception, VM state may has been changed
+            // recapture into the HA worker so that it can really continue in it next turn
+            VMInstanceVO vm = _instanceDao.findById(work.getInstanceId());
+            work.setUpdateTime(vm.getUpdated());
+            work.setPreviousState(vm.getState());
+        }
+        if (!Step.Done.equals(work.getStep()) && work.getTimesTried() >= _maxRetries) {
+            s_logger.warn("Giving up, retried max. times for work: " + work);
+            work.setStep(Step.Done);
+        }
+        _haDao.update(work.getId(), work);
+    }
+
     @Override
     public boolean configure(final String name, final Map<String, Object> xmlParams) throws ConfigurationException {
         _serverId = _msServer.getId();
@@ -881,7 +921,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
         private void runWithContext() {
             HaWorkVO work = null;
             try {
-                s_logger.trace("Checking the database");
+                s_logger.trace("Checking the database for work");
                 work = _haDao.take(_serverId);
                 if (work == null) {
                     try {
@@ -896,56 +936,8 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
                 }
 
                 NDC.push("work-" + work.getId());
-                s_logger.info("Processing " + work);
-
-                try {
-                    final WorkType wt = work.getWorkType();
-                    Long nextTime = null;
-                    if (wt == WorkType.Migration) {
-                        nextTime = migrate(work);
-                    } else if (wt == WorkType.HA) {
-                        nextTime = restart(work);
-                    } else if (wt == WorkType.Stop || wt == WorkType.CheckStop || wt == WorkType.ForceStop) {
-                        nextTime = stopVM(work);
-                    } else if (wt == WorkType.Destroy) {
-                        nextTime = destroyVM(work);
-                    } else {
-                        assert false : "How did we get here with " + wt.toString();
-                        return;
-                    }
-
-                    if (nextTime == null) {
-                        s_logger.info("Completed " + work);
-                        work.setStep(Step.Done);
-                    } else {
-                        s_logger.info("Rescheduling " + work + " to try again at " + new Date(nextTime << 10));
-                        work.setTimeToTry(nextTime);
-                        work.setTimesTried(work.getTimesTried() + 1);
-                        work.setServerId(null);
-                        work.setDateTaken(null);
-                    }
-                } catch (Exception e) {
-                    s_logger.warn("Encountered unhandled exception during HA process, reschedule retry", e);
-
-                    long nextTime = (System.currentTimeMillis() >> 10) + _restartRetryInterval;
-
-                    s_logger.info("Rescheduling " + work + " to try again at " + new Date(nextTime << 10));
-                    work.setTimeToTry(nextTime);
-                    work.setTimesTried(work.getTimesTried() + 1);
-                    work.setServerId(null);
-                    work.setDateTaken(null);
-
-                    // if restart failed in the middle due to exception, VM state may has been changed
-                    // recapture into the HA worker so that it can really continue in it next turn
-                    VMInstanceVO vm = _instanceDao.findById(work.getInstanceId());
-                    work.setUpdateTime(vm.getUpdated());
-                    work.setPreviousState(vm.getState());
-                    if (!Step.Done.equals(work.getStep()) && work.getTimesTried() >= _maxRetries) {
-                        s_logger.warn("Giving up, retries max times for work: " + work);
-                        work.setStep(Step.Done);
-                    }
-                }
-                _haDao.update(work.getId(), work);
+                s_logger.info("Processing work " + work);
+                processWork(work);
             } catch (final Throwable th) {
                 s_logger.error("Caught this throwable, ", th);
             } finally {

http://git-wip-us.apache.org/repos/asf/cloudstack/blob/cbf2c3bb/server/test/com/cloud/ha/HighAvailabilityManagerImplTest.java
----------------------------------------------------------------------
diff --git a/server/test/com/cloud/ha/HighAvailabilityManagerImplTest.java b/server/test/com/cloud/ha/HighAvailabilityManagerImplTest.java
index 087c71a..3102c9a 100644
--- a/server/test/com/cloud/ha/HighAvailabilityManagerImplTest.java
+++ b/server/test/com/cloud/ha/HighAvailabilityManagerImplTest.java
@@ -16,11 +16,14 @@
 // under the License.
 package com.cloud.ha;
 
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
 import java.lang.reflect.Array;
 import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -31,6 +34,7 @@ import org.apache.cloudstack.engine.orchestration.service.VolumeOrchestrationSer
 import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
 import org.apache.cloudstack.managed.context.ManagedContext;
 import org.junit.Before;
+import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.mockito.Mock;
@@ -44,6 +48,8 @@ import com.cloud.dc.DataCenterVO;
 import com.cloud.dc.HostPodVO;
 import com.cloud.dc.dao.DataCenterDao;
 import com.cloud.dc.dao.HostPodDao;
+import com.cloud.ha.HighAvailabilityManager.Step;
+import com.cloud.ha.HighAvailabilityManager.WorkType;
 import com.cloud.ha.dao.HighAvailabilityDao;
 import com.cloud.host.Host;
 import com.cloud.host.HostVO;
@@ -106,6 +112,17 @@ public class HighAvailabilityManagerImplTest {
     HostVO hostVO;
 
     HighAvailabilityManagerImpl highAvailabilityManager;
+    HighAvailabilityManagerImpl highAvailabilityManagerSpy;
+    static Method processWorkMethod = null;
+
+    @BeforeClass
+    public static void initOnce() {
+        try {
+            processWorkMethod = HighAvailabilityManagerImpl.class.getDeclaredMethod("processWork", HaWorkVO.class);
+            processWorkMethod.setAccessible(true);
+        } catch (NoSuchMethodException e) {
+        }
+    }
 
     @Before
     public void setup() throws IllegalArgumentException,
@@ -123,8 +140,12 @@ public class HighAvailabilityManagerImplTest {
                         injectField.set(highAvailabilityManager, obj);
                     }
                 }
+            } else if (injectField.getName().equals("_maxRetries")) {
+                injectField.setAccessible(true);
+                injectField.set(highAvailabilityManager, 5);
             }
         }
+        highAvailabilityManagerSpy = Mockito.spy(highAvailabilityManager);
     }
 
     @Test
@@ -201,4 +222,27 @@ public class HighAvailabilityManagerImplTest {
 
         assertNull(highAvailabilityManager.investigate(1l));
     }
+
+    private void processWorkWithRetryCount(int count, Step expectedStep) {
+        assertNotNull(processWorkMethod);
+        HaWorkVO work = new HaWorkVO(1l, VirtualMachine.Type.User, WorkType.Migration, Step.Scheduled, 1l, VirtualMachine.State.Running, count, 12345678l);
+        Mockito.doReturn(12345678l).when(highAvailabilityManagerSpy).migrate(work);
+        try {
+            processWorkMethod.invoke(highAvailabilityManagerSpy, work);
+        } catch (IllegalAccessException e) {
+        } catch (IllegalArgumentException e) {
+        } catch (InvocationTargetException e) {
+        }
+        assertTrue(work.getStep() == expectedStep);
+    }
+
+    @Test
+    public void processWorkWithRetryCountExceeded() {
+        processWorkWithRetryCount(5, Step.Done); // max retry count is 5
+    }
+
+    @Test
+    public void processWorkWithRetryCountNotExceeded() {
+        processWorkWithRetryCount(3, Step.Scheduled);
+    }
 }


[2/2] git commit: updated refs/heads/master to f732c7d

Posted by ko...@apache.org.
Merge pull request #760 from koushik-das/CLOUDSTACK-8785

CLOUDSTACK-8785: Proper enforcement of retry count (max.retries) for all work type handled by HighAvailability manager
Retry count is properly enforced for all work types in HA manager. Also reorganized some of the code for easy testing.

Signed-off-by: Koushik Das <ko...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo
Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/f732c7d1
Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/f732c7d1
Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/f732c7d1

Branch: refs/heads/master
Commit: f732c7d1e9b2434822718dff9624369294f923c2
Parents: c8bfeb8 cbf2c3b
Author: Koushik Das <ko...@apache.org>
Authored: Tue Sep 1 17:58:45 2015 +0530
Committer: Koushik Das <ko...@apache.org>
Committed: Tue Sep 1 17:59:08 2015 +0530

----------------------------------------------------------------------
 .../cloud/ha/HighAvailabilityManagerImpl.java   | 126 +++++++++----------
 .../ha/HighAvailabilityManagerImplTest.java     |  44 +++++++
 2 files changed, 103 insertions(+), 67 deletions(-)
----------------------------------------------------------------------