You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cloudstack.apache.org by ya...@apache.org on 2013/01/06 06:13:50 UTC

git commit: CLOUDSTACK-799: Redundant router: Speed up RvR status update

Updated Branches:
  refs/heads/master f1c15f0c2 -> aa9701a10


CLOUDSTACK-799: Redundant router: Speed up RvR status update

The basic idea behind this is, deploy a fix sized threadpool for updating RvR
status, then using producer/consumer model. There is a global configuration
router.check.poolsize(10 by default) to control the pool size.

Using pool size 100 for 1000 RvR is tested with simulator and works well.

Also we can adjust the global configuration option router.check.interval to e.g.
60s from default 30s to mitigate the issue.


Project: http://git-wip-us.apache.org/repos/asf/incubator-cloudstack/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-cloudstack/commit/aa9701a1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-cloudstack/tree/aa9701a1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-cloudstack/diff/aa9701a1

Branch: refs/heads/master
Commit: aa9701a10c2ed1aad6edf552e93d6fa792a135f9
Parents: f1c15f0
Author: Sheng Yang <sh...@citrix.com>
Authored: Thu Nov 29 16:57:11 2012 -0800
Committer: Sheng Yang <sh...@citrix.com>
Committed: Sat Jan 5 21:02:37 2013 -0800

----------------------------------------------------------------------
 server/src/com/cloud/configuration/Config.java     |    1 +
 server/src/com/cloud/network/dao/NetworkDao.java   |    1 +
 .../src/com/cloud/network/dao/NetworkDaoImpl.java  |    9 ++
 .../router/VirtualNetworkApplianceManagerImpl.java |   94 ++++++++++++---
 .../test/com/cloud/vpc/dao/MockNetworkDaoImpl.java |    6 +
 setup/db/db/schema-40to410.sql                     |    2 +
 6 files changed, 97 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-cloudstack/blob/aa9701a1/server/src/com/cloud/configuration/Config.java
----------------------------------------------------------------------
diff --git a/server/src/com/cloud/configuration/Config.java b/server/src/com/cloud/configuration/Config.java
index ea32025..92313ea 100755
--- a/server/src/com/cloud/configuration/Config.java
+++ b/server/src/com/cloud/configuration/Config.java
@@ -167,6 +167,7 @@ public enum Config {
 	RouterStatsInterval("Advanced", NetworkManager.class, Integer.class, "router.stats.interval", "300", "Interval (in seconds) to report router statistics.", null),
 	ExternalNetworkStatsInterval("Advanced", NetworkManager.class, Integer.class, "external.network.stats.interval", "300", "Interval (in seconds) to report external network statistics.", null),	
 	RouterCheckInterval("Advanced", NetworkManager.class, Integer.class, "router.check.interval", "30", "Interval (in seconds) to report redundant router status.", null),
+	RouterCheckPoolSize("Advanced", NetworkManager.class, Integer.class, "router.check.poolsize", "10", "Numbers of threads using to check redundant router status.", null),
 	RouterTemplateId("Advanced", NetworkManager.class, Long.class, "router.template.id", "1", "Default ID for template.", null),
     RouterExtraPublicNics("Advanced", NetworkManager.class, Integer.class, "router.extra.public.nics", "2", "specify extra public nics used for virtual router(up to 5)", "0-5"),
 	StartRetry("Advanced", AgentManager.class, Integer.class, "start.retry", "10", "Number of times to retry create and start commands", null),

http://git-wip-us.apache.org/repos/asf/incubator-cloudstack/blob/aa9701a1/server/src/com/cloud/network/dao/NetworkDao.java
----------------------------------------------------------------------
diff --git a/server/src/com/cloud/network/dao/NetworkDao.java b/server/src/com/cloud/network/dao/NetworkDao.java
index 4079955..1fefb75 100644
--- a/server/src/com/cloud/network/dao/NetworkDao.java
+++ b/server/src/com/cloud/network/dao/NetworkDao.java
@@ -110,4 +110,5 @@ public interface NetworkDao extends GenericDao<NetworkVO, Long> {
 
     List<NetworkVO> listNetworksByAccount(long accountId, long zoneId, Network.GuestType type, boolean isSystem);
 
+    List<NetworkVO> listRedundantNetworks();
 }

http://git-wip-us.apache.org/repos/asf/incubator-cloudstack/blob/aa9701a1/server/src/com/cloud/network/dao/NetworkDaoImpl.java
----------------------------------------------------------------------
diff --git a/server/src/com/cloud/network/dao/NetworkDaoImpl.java b/server/src/com/cloud/network/dao/NetworkDaoImpl.java
index 8228393..27ae3a3 100644
--- a/server/src/com/cloud/network/dao/NetworkDaoImpl.java
+++ b/server/src/com/cloud/network/dao/NetworkDaoImpl.java
@@ -103,6 +103,7 @@ public class NetworkDaoImpl extends GenericDaoBase<NetworkVO, Long> implements N
         AllFieldsSearch.and("vpcId", AllFieldsSearch.entity().getVpcId(), Op.EQ);
         SearchBuilder<NetworkOfferingVO> join1 = _ntwkOffDao.createSearchBuilder();
         join1.and("isSystem", join1.entity().isSystemOnly(), Op.EQ);
+        join1.and("isRedundant", join1.entity().getRedundantRouter(), Op.EQ);
         AllFieldsSearch.join("offerings", join1, AllFieldsSearch.entity().getNetworkOfferingId(), join1.entity().getId(), JoinBuilder.JoinType.INNER);
         AllFieldsSearch.done();
 
@@ -574,4 +575,12 @@ public class NetworkDaoImpl extends GenericDaoBase<NetworkVO, Long> implements N
         List<NetworkVO> networks = search(sc, null);
         return networks;
     }
+
+    @Override
+    public List<NetworkVO> listRedundantNetworks() {
+        SearchCriteria<NetworkVO> sc = AllFieldsSearch.create();
+        sc.setJoinParameters("offerings", "isRedundant", true);
+        
+        return listBy(sc, null);
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-cloudstack/blob/aa9701a1/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java
----------------------------------------------------------------------
diff --git a/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java b/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java
index c61e979..fcba6df 100755
--- a/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java
+++ b/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java
@@ -28,9 +28,14 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Queue;
 import java.util.Set;
 import java.util.TimeZone;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
+import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
 
@@ -339,6 +344,7 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian
 
     int _routerStatsInterval = 300;
     int _routerCheckInterval = 30;
+    int _rvrStatusUpdatePoolSize = 10;
     protected ServiceOfferingVO _offering;
     private String _dnsBasicZoneUpdates = "all";
     private Set<String> _guestOSNeedGatewayOnNonDefaultNetwork = new HashSet<String>();
@@ -353,8 +359,11 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian
     ScheduledExecutorService _executor;
     ScheduledExecutorService _checkExecutor;
     ScheduledExecutorService _networkStatsUpdateExecutor;
+    ExecutorService _rvrStatusUpdateExecutor;
 
     Account _systemAcct;
+    
+    BlockingQueue<Long> _vrUpdateQueue = null;
 
     @Override
     public boolean sendSshKeysToHost(Long hostId, String pubKey, String prvKey) {
@@ -582,7 +591,7 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian
         _executor = Executors.newScheduledThreadPool(1, new NamedThreadFactory("RouterMonitor"));
         _checkExecutor = Executors.newScheduledThreadPool(1, new NamedThreadFactory("RouterStatusMonitor"));
         _networkStatsUpdateExecutor = Executors.newScheduledThreadPool(1, new NamedThreadFactory("NetworkStatsUpdater"));
-
+        
         final ComponentLocator locator = ComponentLocator.getCurrentLocator();
 
         final Map<String, String> configs = _configDao.getConfiguration("AgentManager", params);
@@ -609,7 +618,18 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian
 
         value = configs.get("router.check.interval");
         _routerCheckInterval = NumbersUtil.parseInt(value, 30);
-        
+
+        value = configs.get("router.check.poolsize");
+        _rvrStatusUpdatePoolSize = NumbersUtil.parseInt(value, 10);
+
+        /* 
+         * We assume that one thread can handle 20 requests in 1 minute in normal situation, so here we give the queue size up to 50 minutes.
+         * It's mostly for buffer, since each time CheckRouterTask running, it would add all the redundant networks in the queue immediately
+         */
+        _vrUpdateQueue = new LinkedBlockingQueue<Long>(_rvrStatusUpdatePoolSize * 1000); 
+
+        _rvrStatusUpdateExecutor = Executors.newFixedThreadPool(_rvrStatusUpdatePoolSize, new NamedThreadFactory("RedundantRouterStatusMonitor"));
+
         _instance = configs.get("instance.name");
         if (_instance == null) {
             _instance = "DEFAULT";
@@ -707,6 +727,9 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian
         
         if (_routerCheckInterval > 0) {
             _checkExecutor.scheduleAtFixedRate(new CheckRouterTask(), _routerCheckInterval, _routerCheckInterval, TimeUnit.SECONDS);
+            for (int i = 0; i < _rvrStatusUpdatePoolSize; i++) {
+                _rvrStatusUpdateExecutor.execute(new RvRStatusUpdateTask());
+            }
         } else {
             s_logger.debug("router.check.interval - " + _routerCheckInterval+ " so not scheduling the redundant router checking thread");
         }
@@ -1042,14 +1065,11 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian
                 if (host == null || host.getStatus() != Status.Up) {
                     router.setRedundantState(RedundantState.UNKNOWN);
                     updated = true;
-                } else if (host.getManagementServerId() != ManagementServerNode.getManagementServerId()) {
-                    /* Only cover hosts managed by this management server */
-                    continue;
                 } else if (privateIP != null) {
                     final CheckRouterCommand command = new CheckRouterCommand();
                     command.setAccessDetail(NetworkElementCommand.ROUTER_IP, getRouterControlIp(router.getId()));
                     command.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName());
-                    command.setWait(60);
+                    command.setWait(30);
                     final Answer origAnswer = _agentMgr.easySend(router.getHostId(), command);
                     CheckRouterAnswer answer = null;
                     if (origAnswer instanceof CheckRouterAnswer) {
@@ -1131,11 +1151,11 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian
         return priority;
     }
 
-    protected class CheckRouterTask implements Runnable {
+    protected class RvRStatusUpdateTask implements Runnable {
 
-        public CheckRouterTask() {
+        public RvRStatusUpdateTask() {
         }
-
+        
         /*
          * In order to make fail-over works well at any time, we have to ensure:
          * 1. Backup router's priority = Master's priority - DELTA + 1
@@ -1198,9 +1218,9 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian
                         String title = "More than one redundant virtual router is in MASTER state! Router " + router.getHostName() + " and router " + dupRouter.getHostName();
                         String context =  "Virtual router (name: " + router.getHostName() + ", id: " + router.getId() + " and router (name: "
                                 + dupRouter.getHostName() + ", id: " + router.getId() + ") are both in MASTER state! If the problem persist, restart both of routers. ";
-
                         _alertMgr.sendAlert(AlertManager.ALERT_TYPE_DOMAIN_ROUTER, router.getDataCenterIdToDeployIn(), router.getPodIdToDeployIn(), title, context);
                         _alertMgr.sendAlert(AlertManager.ALERT_TYPE_DOMAIN_ROUTER, dupRouter.getDataCenterIdToDeployIn(), dupRouter.getPodIdToDeployIn(), title, context);
+                        s_logger.warn(context);
                     } else {
                             networkRouterMaps.put(routerGuestNtwkId, router);
                         }
@@ -1211,17 +1231,59 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian
 
         @Override
         public void run() {
+            while (true) {
+                try {
+                    Long networkId = _vrUpdateQueue.take();
+                    List <DomainRouterVO> routers = _routerDao.listByNetworkAndRole(networkId, Role.VIRTUAL_ROUTER);
+
+                    if (routers.size() != 2) {
+                        continue;
+                    }
+                    /*
+                     * We update the router pair which the lower id router owned by this mgmt server, in order
+                     * to prevent duplicate update of router status from cluster mgmt servers
+                     */
+                    DomainRouterVO router = routers.get(0);
+                    if (routers.get(1).getId() < router.getId()) {
+                        router = routers.get(1);
+                    }
+                    HostVO host = _hostDao.findById(router.getHostId());
+                    if (host == null || host.getManagementServerId() == null ||
+                            host.getManagementServerId() != ManagementServerNode.getManagementServerId()) {
+                        continue;
+                    }
+                    updateRoutersRedundantState(routers);
+                    checkDuplicateMaster(routers);
+                    checkSanity(routers);
+                } catch (Exception ex) {
+                    s_logger.error("Fail to complete the RvRStatusUpdateTask! ", ex);
+                }
+            }
+        }
+        
+    }
+    
+    protected class CheckRouterTask implements Runnable {
+
+        public CheckRouterTask() {
+        }
+
+        @Override
+        public void run() {
             try {
                 final List<DomainRouterVO> routers = _routerDao.listIsolatedByHostId(null);
-                s_logger.debug("Found " + routers.size() + " routers. ");
+                s_logger.debug("Found " + routers.size() + " routers to update status. ");
 
-                updateRoutersRedundantState(routers);
                 updateSite2SiteVpnConnectionState(routers);
 
-                /* FIXME assumed the a pair of redundant routers managed by same mgmt server,
-                 * then the update above can get the latest status */
-                checkDuplicateMaster(routers);
-                checkSanity(routers);
+                final List<NetworkVO> networks = _networkDao.listRedundantNetworks();
+                s_logger.debug("Found " + networks.size() + " networks to update RvR status. ");
+                for (NetworkVO network : networks) {
+                    if (!_vrUpdateQueue.offer(network.getId(), 500, TimeUnit.MILLISECONDS)) {
+                        s_logger.warn("Cannot insert into virtual router update queue! Adjustment of router.check.interval and router.check.poolsize maybe needed.");
+                        break;
+                    }
+                }
             } catch (Exception ex) {
                 s_logger.error("Fail to complete the CheckRouterTask! ", ex);
             }

http://git-wip-us.apache.org/repos/asf/incubator-cloudstack/blob/aa9701a1/server/test/com/cloud/vpc/dao/MockNetworkDaoImpl.java
----------------------------------------------------------------------
diff --git a/server/test/com/cloud/vpc/dao/MockNetworkDaoImpl.java b/server/test/com/cloud/vpc/dao/MockNetworkDaoImpl.java
index 509d9c7..7c9a582 100644
--- a/server/test/com/cloud/vpc/dao/MockNetworkDaoImpl.java
+++ b/server/test/com/cloud/vpc/dao/MockNetworkDaoImpl.java
@@ -351,4 +351,10 @@ public class MockNetworkDaoImpl extends GenericDaoBase<NetworkVO, Long> implemen
         return null;
     }
 
+    @Override
+    public List<NetworkVO> listRedundantNetworks() {
+        // TODO Auto-generated method stub
+        return null;
+    }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-cloudstack/blob/aa9701a1/setup/db/db/schema-40to410.sql
----------------------------------------------------------------------
diff --git a/setup/db/db/schema-40to410.sql b/setup/db/db/schema-40to410.sql
index 5245a49..4455956 100644
--- a/setup/db/db/schema-40to410.sql
+++ b/setup/db/db/schema-40to410.sql
@@ -52,6 +52,8 @@ CREATE TABLE `cloud`.`template_s3_ref` (
 
 INSERT IGNORE INTO `cloud`.`configuration` VALUES ('Advanced', 'DEFAULT', 'management-server', 's3.enable', 'false', 'enable s3');
 
+INSERT IGNORE INTO `cloud`.`configuration` VALUES ('Advanced', 'DEFAULT', 'NetworkManager', 'router.check.poolsize' , '10', 'Numbers of threads using to check redundant router status.');
+
 ALTER TABLE `cloud`.`snapshots` ADD COLUMN `s3_id` bigint unsigned COMMENT 'S3 to which this snapshot will be stored';
 
 ALTER TABLE `cloud`.`snapshots` ADD CONSTRAINT `fk_snapshots__s3_id` FOREIGN KEY `fk_snapshots__s3_id` (`s3_id`) REFERENCES `s3` (`id`);