You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@helix.apache.org by ki...@apache.org on 2013/09/20 20:30:19 UTC
[10/15] Adding Helix-task-framework and Yarn integration modules
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModel.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModel.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModel.java
new file mode 100644
index 0000000..814387f
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModel.java
@@ -0,0 +1,114 @@
+package org.apache.helix.autoscale.provider;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.autoscale.ClusterAdmin;
+import org.apache.helix.autoscale.ContainerProvider;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Helix state model implementation for {@link ContainerProvider}s. Updates
+ * configuration of managed Helix cluster and spawns and destroys container
+ * instances.
+ *
+ */
+@StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE" })
+public class ProviderStateModel extends StateModel {
+
+ static final Logger log = Logger.getLogger(ProviderStateModel.class);
+
+ ContainerProvider provider;
+ ClusterAdmin admin;
+
+ public ProviderStateModel(ContainerProvider provider, ClusterAdmin admin) {
+ this.provider = provider;
+ this.admin = admin;
+ }
+
+ @Transition(from = "OFFLINE", to = "ONLINE")
+ public void acquire(Message m, NotificationContext context) throws Exception {
+ String containerType = m.getResourceName();
+ String containerId = m.getPartitionName();
+ String instanceId = context.getManager().getInstanceName();
+
+ log.trace(String.format("%s:%s transitioning from OFFLINE to ONLINE", containerId, instanceId));
+
+ bestEffortRemove(containerId);
+
+ // add instance to cluster
+ admin.addInstance(containerId, containerType);
+
+ // create container
+ provider.create(containerId, containerType);
+
+ try {
+ admin.rebalance();
+ } catch (Exception e) {
+ // ignore
+ log.warn(String.format("rebalancing cluster failed (error='%s')", e.getMessage()));
+ }
+
+ log.info(String.format("%s acquired container '%s' (type='%s')", instanceId, containerId, containerType));
+ }
+
+ @Transition(from = "ONLINE", to = "OFFLINE")
+ public void release(Message m, NotificationContext context) {
+ String containerId = m.getPartitionName();
+ String instanceId = context.getManager().getInstanceName();
+
+ log.trace(String.format("%s:%s transitioning from ONLINE to OFFLINE", containerId, instanceId));
+
+ bestEffortRemove(containerId);
+
+ try {
+ admin.rebalance();
+ } catch (Exception e) {
+ // ignore
+ log.warn(String.format("rebalancing cluster failed (error='%s')", e.getMessage()));
+ }
+
+ log.info(String.format("%s destroyed container '%s'", instanceId, containerId));
+
+ }
+
+ @Transition(from = "ERROR", to = "OFFLINE")
+ public void recover(Message m, NotificationContext context) {
+ String containerId = m.getPartitionName();
+ String instanceId = context.getManager().getInstanceName();
+
+ log.trace(String.format("%s:%s transitioning from ERROR to OFFLINE", containerId, instanceId));
+
+ release(m, context);
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void drop(Message m, NotificationContext context) {
+ String containerId = m.getPartitionName();
+ String instanceId = context.getManager().getInstanceName();
+
+ log.trace(String.format("%s:%s transitioning from OFFLINE to DROPPED", containerId, instanceId));
+ }
+
+ private void bestEffortRemove(String containerId) {
+ log.debug(String.format("Best effort removal of container '%s'", containerId));
+
+ try {
+ provider.destroy(containerId);
+ log.debug(String.format("Container '%s' destroyed", containerId));
+ } catch (Exception e) {
+ log.debug(String.format("Container '%s' does not exist", containerId));
+ }
+
+ try {
+ admin.removeInstance(containerId);
+ log.debug(String.format("Instance '%s' removed", containerId));
+ } catch (Exception e) {
+ log.debug(String.format("Instance '%s' does not exist", containerId));
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModelFactory.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModelFactory.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModelFactory.java
new file mode 100644
index 0000000..2613336
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModelFactory.java
@@ -0,0 +1,27 @@
+package org.apache.helix.autoscale.provider;
+
+import org.apache.helix.autoscale.ClusterAdmin;
+import org.apache.helix.autoscale.ContainerProvider;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+
+/**
+ * Factory for {@link ProviderStateModel}. Injects {@link ClusterAdmin} for
+ * managed cluster and {@link ContainerProvider}.
+ *
+ */
+class ProviderStateModelFactory extends StateModelFactory<ProviderStateModel> {
+
+ final ContainerProvider provider;
+ final ClusterAdmin admin;
+
+ public ProviderStateModelFactory(ContainerProvider provider, ClusterAdmin admin) {
+ super();
+ this.provider = provider;
+ this.admin = admin;
+ }
+
+ @Override
+ public ProviderStateModel createNewStateModel(String partitionName) {
+ return new ProviderStateModel(provider, admin);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/Boot2By2Local.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/Boot2By2Local.properties b/recipes/auto-scale/src/main/resources/Boot2By2Local.properties
new file mode 100644
index 0000000..13fb4ff
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/Boot2By2Local.properties
@@ -0,0 +1,87 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/autoscale/zk/data
+zookeeper.logdir=/tmp/autoscale/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=localhost:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=localhost:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=localhost:2199
+metaresource.0.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=localhost:2199
+metaresource.1.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=localhost:2199
+metaprovider.0.class=org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=localhost:2199
+metaprovider.1.class=org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.autoscale.impl.local.LocalStatusProvider
+
+metacontroller.target.class=org.apache.helix.autoscale.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/Boot2By2Shell.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/Boot2By2Shell.properties b/recipes/auto-scale/src/main/resources/Boot2By2Shell.properties
new file mode 100644
index 0000000..079771b
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/Boot2By2Shell.properties
@@ -0,0 +1,87 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/autoscale/zk/data
+zookeeper.logdir=/tmp/autoscale/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=localhost:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=localhost:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=localhost:2199
+metaresource.0.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=localhost:2199
+metaresource.1.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=localhost:2199
+metaprovider.0.class=org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=localhost:2199
+metaprovider.1.class=org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.autoscale.impl.shell.ShellStatusProvider
+
+metacontroller.target.class=org.apache.helix.autoscale.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/Boot2By2Yarn.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/Boot2By2Yarn.properties b/recipes/auto-scale/src/main/resources/Boot2By2Yarn.properties
new file mode 100644
index 0000000..e447711
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/Boot2By2Yarn.properties
@@ -0,0 +1,98 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/autoscale/zk/data
+zookeeper.logdir=/tmp/autoscale/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=rm:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=rm:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=rm:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=rm:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=rm:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=rm:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=rm:2199
+metaresource.0.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=rm:2199
+metaresource.1.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=rm:2199
+metaprovider.0.class=org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess
+metaprovider.0.yarndata=rm:2199
+metaprovider.0.resourcemananger=rm:8032
+metaprovider.0.scheduler=rm:8030
+metaprovider.0.user=yarn
+metaprovider.0.hdfs=hdfs://rm:9000/
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=rm:2199
+metaprovider.1.class=org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess
+metaprovider.1.yarndata=rm:2199
+metaprovider.1.resourcemananger=rm:8032
+metaprovider.1.scheduler=rm:8030
+metaprovider.1.user=yarn
+metaprovider.1.hdfs=hdfs://rm:9000/
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=rm:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.autoscale.impl.yarn.YarnStatusProvider
+metacontroller.status.yarndata=rm:2199
+
+metacontroller.target.class=org.apache.helix.autoscale.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/BootLocal.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/BootLocal.properties b/recipes/auto-scale/src/main/resources/BootLocal.properties
new file mode 100644
index 0000000..15905fc
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/BootLocal.properties
@@ -0,0 +1,68 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/autoscale/zk/data
+zookeeper.logdir=/tmp/autoscale/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.name=resource
+resource.cluster=cluster
+resource.address=localhost:2199
+resource.container=container
+resource.model=MasterSlave
+resource.partitions=10
+resource.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.name=container
+metaresource.metacluster=meta
+metaresource.metaaddress=localhost:2199
+metaresource.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.name=provider
+metaprovider.metacluster=meta
+metaprovider.metaaddress=localhost:2199
+metaprovider.class=org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.autoscale.impl.local.LocalStatusProvider
+
+metacontroller.target.class=org.apache.helix.autoscale.impl.StaticTargetProvider
+metacontroller.target.container=7
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/RedisYarnSample.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/RedisYarnSample.properties b/recipes/auto-scale/src/main/resources/RedisYarnSample.properties
new file mode 100644
index 0000000..eb58fd2
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/RedisYarnSample.properties
@@ -0,0 +1,89 @@
+###############################################################################
+# Managed cluster configuration
+###############################################################################
+
+cluster.name = rediscluster
+cluster.address = helix.zookeeper.intra
+
+#
+# Helix cluster resources
+# (as usual)
+#
+resource.0.name = redis
+resource.0.cluster = rediscluster
+resource.0.address = helix.zookeeper.intra
+resource.0.container = rediscontainer # see meta cluster
+resource.0.model = OnlineOffline
+resource.0.partitions = 1024 # are auto-assigned
+resource.0.replica = 1 # no replication needed
+
+#
+# Helix cluster controllers
+# (as usual)
+#
+controller.name = controller
+controller.cluster = rediscluster
+controller.address = helix.zookeeper.intra
+
+###############################################################################
+# Meta cluster configuration
+###############################################################################
+
+metacluster.name = meta
+metacluster.address = helix.zookeeper.intra
+metacluster.managedcluster = rediscluster
+metacluster.managedaddress = helix.zookeeper.intra
+
+#
+# Container Types
+# (Base configuration for instances spawned by providers)
+#
+metaresource.0.name = rediscontainer
+metaresource.0.class = org.apache.helix.autoscale.impl.container.RedisServerProcess
+metaresource.0.metacluster = meta
+metaresource.0.metaaddress = helix.zookeeper.intra
+metaresource.0.address = apps.zookeeper.intra
+metaresource.0.baseport = 17000 # instance id is added
+
+#
+# Container Instance Providers
+# (Endpoints of container deployment frameworks, e.g. Apache YARN)
+#
+metaprovider.0.name = provider
+metaprovider.0.class = org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess
+metaprovider.0.metacluster = meta
+metaprovider.0.metaaddress = helix.zookeeper.intra
+metaprovider.0.yarndata = yarn.zookeeper.intra
+metaprovider.0.resourcemananger = yarn-rm.intra:8032
+metaprovider.0.scheduler = yarn-rm.intra:8030
+metaprovider.0.hdfs = hdfs://yarn-hdfs.intra:9000/
+metaprovider.0.user = yarnuser
+
+#
+# Helix meta cluster controller
+# (Monitors system state and adapts config of Container Instance Providers)
+#
+metacontroller.name = metacontroller
+metacontroller.metacluster = meta
+metacontroller.metaddress = helix.zookeeper.intra
+metacontroller.autorefresh = 10000 # status refresh interval
+
+#
+# Container Status Provider
+# (Provides low-level data on container instance health)
+#
+metacontroller.status.class = org.apache.helix.autoscale.impl.yarn.YarnStatusProvider
+metacontroller.status.yarndata = yarn.zookeeper.intra
+
+#
+# Performance Target Model
+# (Provides target number of container instances)
+#
+metacontroller.target.class = org.apache.helix.autoscale.impl.RedisTargetProvider
+metacontroller.target.address = apps.zookeeper.intra
+metacontroller.target.interval = 10000 # interval for Tps probes
+metacontroller.target.timeout = 9000 # timeout of probe
+metacontroller.target.get = 1000000 # target "GET" Tps
+metacontroller.target.min = 1 # min container count
+metacontroller.target.max = 23 # max container count
+metacontroller.target.alpha = 0.1 # exponential average
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/log4j.properties b/recipes/auto-scale/src/main/resources/log4j.properties
new file mode 100644
index 0000000..7f29be2
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/log4j.properties
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Set root logger level to DEBUG and its only appender to R.
+log4j.rootLogger=ERROR, C
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.C=org.apache.log4j.ConsoleAppender
+log4j.appender.C.layout=org.apache.log4j.PatternLayout
+log4j.appender.C.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.I0Itec=ERROR
+log4j.logger.org.apache=ERROR
+
+log4j.logger.org.apache.helix.autoscale=INFO
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/config/testng.xml
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/config/testng.xml b/recipes/auto-scale/src/test/config/testng.xml
new file mode 100644
index 0000000..f710791
--- /dev/null
+++ b/recipes/auto-scale/src/test/config/testng.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="Suite" parallel="none">
+ <test name="Test" preserve-order="true">
+ <packages>
+ <package name="org.apache.helix.autoscale.*"/>
+ </packages>
+ </test>
+</suite>
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/BootstrapperIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/BootstrapperIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/BootstrapperIT.java
new file mode 100644
index 0000000..5dd7820
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/BootstrapperIT.java
@@ -0,0 +1,134 @@
+package org.apache.helix.autoscale;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.bootstrapper.Boot;
+import org.apache.helix.autoscale.bootstrapper.ClusterService;
+import org.apache.helix.autoscale.bootstrapper.ControllerService;
+import org.apache.helix.autoscale.bootstrapper.MetaClusterService;
+import org.apache.helix.autoscale.bootstrapper.MetaControllerService;
+import org.apache.helix.autoscale.bootstrapper.MetaProviderService;
+import org.apache.helix.autoscale.bootstrapper.MetaResourceService;
+import org.apache.helix.autoscale.bootstrapper.ResourceService;
+import org.apache.helix.autoscale.bootstrapper.ZookeeperService;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.log4j.Logger;
+import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Bootstrapping tool test. Reads cluster configuration from *.properties files,
+ * spawns services and verifies number of active partitions and containers
+ *
+ * @see Boot
+ */
+@Test(groups = { "integration", "boot" })
+public class BootstrapperIT {
+
+ static final Logger log = Logger.getLogger(BootstrapperIT.class);
+
+ Boot boot;
+ HelixAdmin admin;
+
+ @AfterMethod(alwaysRun = true)
+ public void teardown() throws Exception {
+ log.debug("tearing down bootstrap test");
+ if (admin != null) {
+ admin.close();
+ admin = null;
+ }
+ if (boot != null) {
+ boot.stop();
+ boot = null;
+ }
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+ public void bootstrapLocalTest() throws Exception {
+ boot = new Boot();
+ boot.configure(getProperties("BootLocal.properties"));
+ boot.start();
+
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), ZookeeperService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), ClusterService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), ResourceService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), ControllerService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaClusterService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaResourceService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaProviderService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaControllerService.class));
+
+ final long limit = System.currentTimeMillis() + TestUtils.REBALANCE_TIMEOUT;
+
+ admin = new ZKHelixAdmin("localhost:2199");
+ waitUntil(admin, "meta", "container", 1, 7, (limit - System.currentTimeMillis()));
+ waitUntil(admin, "cluster", "resource", 7, 10, (limit - System.currentTimeMillis()));
+
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+ public void bootstrap2By2LocalTest() throws Exception {
+ boot = new Boot();
+ boot.configure(getProperties("Boot2By2Local.properties"));
+ boot.start();
+
+ verify2By2Setup();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+ public void bootstrap2By2ShellTest() throws Exception {
+ boot = new Boot();
+ boot.configure(getProperties("Boot2By2Shell.properties"));
+ boot.start();
+
+ verify2By2Setup();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+ public void bootstrap2By2YarnTest() throws Exception {
+ boot = new Boot();
+ boot.configure(getProperties("Boot2By2Yarn.properties"));
+ boot.start();
+
+ verify2By2Setup();
+ }
+
+ void verify2By2Setup() throws Exception {
+ final long limit = System.currentTimeMillis() + TestUtils.REBALANCE_TIMEOUT;
+ final String address = "localhost:2199";
+
+ log.debug(String.format("connecting to zookeeper at '%s'", address));
+
+ admin = new ZKHelixAdmin(address);
+ waitUntil(admin, "meta", "database", 2, 3, (limit - System.currentTimeMillis()));
+ waitUntil(admin, "meta", "webserver", 2, 5, (limit - System.currentTimeMillis()));
+ waitUntil(admin, "cluster", "dbprod", 3, 8, (limit - System.currentTimeMillis()));
+ waitUntil(admin, "cluster", "wsprod", 5, 15, (limit - System.currentTimeMillis()));
+ }
+
+ static void waitUntil(HelixAdmin admin, String cluster, String resource, int instanceCount, int partitionCount, long timeout) throws Exception {
+ final long limit = System.currentTimeMillis() + timeout;
+ TestUtils.waitUntilInstanceCount(admin, cluster, resource, instanceCount, (limit - System.currentTimeMillis()));
+ TestUtils.waitUntilPartitionCount(admin, cluster, resource, partitionCount, (limit - System.currentTimeMillis()));
+ }
+
+ static Properties getProperties(String resourcePath) throws IOException {
+ Properties properties = new Properties();
+ properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+ return properties;
+ }
+
+ static boolean containsInstanceOf(Collection<Service> services, Class<?> clazz) {
+ for (Service service : services) {
+ if (clazz.isAssignableFrom(service.getClass()))
+ return true;
+ }
+ return false;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/FailoverIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/FailoverIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/FailoverIT.java
new file mode 100644
index 0000000..429146a
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/FailoverIT.java
@@ -0,0 +1,195 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.autoscale.impl.local.LocalContainerSingleton;
+import org.apache.helix.autoscale.impl.local.LocalStatusProvider;
+import org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.autoscale.impl.shell.ShellContainerSingleton;
+import org.apache.helix.autoscale.impl.shell.ShellStatusProvider;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.autoscale.impl.yarn.YarnStatusProvider;
+import org.apache.helix.autoscale.impl.yarn.ZookeeperYarnDataProvider;
+import org.apache.helix.autoscale.provider.ProviderRebalancer;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Fault-recovery test for individual containers and whole providers. Missing
+ * containers should be replaced by the meta cluster Rebalancer using remaining
+ * active providers.
+ *
+ * @see ProviderRebalancer
+ */
+@Test(groups = { "integration", "failure" })
+public class FailoverIT {
+
+ static final Logger log = Logger.getLogger(FailoverIT.class);
+
+ static final int CONTAINER_COUNT = 7;
+
+ StaticTargetProvider targetProvider;
+ YarnStatusProvider yarnStatusProvider;
+
+ @BeforeClass(alwaysRun = true)
+ public void setupClass() {
+ log.info("installing shutdown hook");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ try {
+ teardownTest();
+ } catch (Exception ignore) {
+ }
+ ;
+ }
+ }));
+ }
+
+ @BeforeMethod(alwaysRun = true)
+ public void setupTest() throws Exception {
+ teardownTest();
+ targetProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ }
+
+ @AfterMethod(alwaysRun = true)
+ public void teardownTest() throws Exception {
+ TestUtils.stopTestCluster();
+
+ if (yarnStatusProvider != null) {
+ yarnStatusProvider.stop();
+ yarnStatusProvider = null;
+ }
+
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+ public void testLocalContainerFailover() throws Exception {
+ log.info("testing local container failover");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ TestUtils.startTestCluster(targetProvider, new LocalStatusProvider(), makeLocalProviders(3));
+ killLocalContainers();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+ public void testLocalProviderFailover() throws Exception {
+ log.info("testing local provider failover");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ TestUtils.startTestCluster(targetProvider, new LocalStatusProvider(), makeLocalProviders(3));
+ killProvider();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+ public void testShellContainerFailover() throws Exception {
+ log.info("testing shell container failover");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ TestUtils.startTestCluster(targetProvider, new ShellStatusProvider(), makeShellProviders(3));
+ killShellContainers();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+ public void testShellProviderFailover() throws Exception {
+ log.info("testing shell provider failover");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ TestUtils.startTestCluster(targetProvider, new ShellStatusProvider(), makeShellProviders(3));
+ killProvider();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+ public void testYarnContainerFailover() throws Exception {
+ log.info("testing yarn container failover");
+ TestUtils.configure("distributed.properties");
+ TestUtils.startZookeeper();
+ yarnStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+ yarnStatusProvider.start();
+ TestUtils.startTestCluster(targetProvider, yarnStatusProvider, makeYarnProviders(3));
+ killYarnContainers();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+ public void testYarnProviderFailover() throws Exception {
+ log.info("testing yarn provider failover");
+ TestUtils.configure("distributed.properties");
+ TestUtils.startZookeeper();
+ yarnStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+ yarnStatusProvider.start();
+ TestUtils.startTestCluster(targetProvider, yarnStatusProvider, makeYarnProviders(3));
+ killProvider();
+ }
+
+ void killLocalContainers() throws Exception {
+ LocalContainerSingleton.killProcess("container_2");
+ LocalContainerSingleton.killProcess("container_4");
+ LocalContainerSingleton.killProcess("container_6");
+ Thread.sleep(3000);
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+ }
+
+ void killShellContainers() throws Exception {
+ ShellContainerSingleton.killProcess("container_2");
+ ShellContainerSingleton.killProcess("container_4");
+ ShellContainerSingleton.killProcess("container_6");
+ Thread.sleep(3000);
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+ }
+
+ void killYarnContainers() throws Exception {
+ ZookeeperYarnDataProvider yarnDataService = new ZookeeperYarnDataProvider(TestUtils.zkAddress);
+ yarnDataService.start();
+ yarnDataService.delete("container_2");
+ yarnDataService.delete("container_4");
+ yarnDataService.delete("container_6");
+ yarnDataService.stop();
+ Thread.sleep(3000);
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+ }
+
+ static void killProvider() throws Exception {
+ Iterator<Service> itService = TestUtils.providerServices.iterator();
+ itService.next().stop();
+ itService.remove();
+
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+ }
+
+ LocalContainerProviderProcess[] makeLocalProviders(int count) throws Exception {
+ LocalContainerProviderProcess[] localProviders = new LocalContainerProviderProcess[count];
+ for (int i = 0; i < count; i++) {
+ localProviders[i] = TestUtils.makeLocalProvider("provider_" + i);
+ }
+ return localProviders;
+ }
+
+ ShellContainerProviderProcess[] makeShellProviders(int count) throws Exception {
+ ShellContainerProviderProcess[] shellProviders = new ShellContainerProviderProcess[count];
+ for (int i = 0; i < count; i++) {
+ shellProviders[i] = TestUtils.makeShellProvider("provider_" + i);
+ }
+ return shellProviders;
+ }
+
+ YarnContainerProviderProcess[] makeYarnProviders(int count) throws Exception {
+ YarnContainerProviderProcess[] yarnProviders = new YarnContainerProviderProcess[count];
+ for (int i = 0; i < count; i++) {
+ yarnProviders[i] = TestUtils.makeYarnProvider("provider_" + i);
+ }
+ return yarnProviders;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/LocalContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/LocalContainerProviderIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/LocalContainerProviderIT.java
new file mode 100644
index 0000000..94ea5ac
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/LocalContainerProviderIT.java
@@ -0,0 +1,80 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.autoscale.impl.local.LocalStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Local container provider and local status provider test. Scale-up and -down
+ * only, no failures.
+ *
+ * @see LocalContainerProvider
+ * @see LocalStatusProvider
+ */
+@Test(groups = { "integration", "local" })
+public class LocalContainerProviderIT {
+
+ static final Logger log = Logger.getLogger(LocalContainerProviderIT.class);
+
+ static final int CONTAINER_COUNT = 4;
+
+ StaticTargetProvider clusterStatusProvider;
+ LocalContainerProviderProcess containerProvider;
+ LocalStatusProvider containerStatusProvider;
+
+ @BeforeMethod(alwaysRun = true)
+ public void setupTest() throws Exception {
+ teardownTest();
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ containerProvider = TestUtils.makeLocalProvider("provider_0");
+ clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ containerStatusProvider = new LocalStatusProvider();
+ TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+ }
+
+ @AfterMethod(alwaysRun = true)
+ public void teardownTest() throws Exception {
+ TestUtils.stopTestCluster();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testStatic() throws Exception {
+ log.info("testing static");
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleUp() throws Exception {
+ log.info("testing scale up");
+ setContainerCount(CONTAINER_COUNT + 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleDown() throws Exception {
+ log.info("testing scale down");
+ setContainerCount(CONTAINER_COUNT - 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleCycle() throws Exception {
+ log.info("testing scale cycle");
+ setContainerCount(CONTAINER_COUNT + 2);
+ setContainerCount(CONTAINER_COUNT);
+ setContainerCount(CONTAINER_COUNT - 2);
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ void setContainerCount(int newContainerCount) throws Exception {
+ log.debug(String.format("Setting container count to %d", newContainerCount));
+ clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+ TestUtils.rebalanceTestCluster();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/ShellContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/ShellContainerProviderIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/ShellContainerProviderIT.java
new file mode 100644
index 0000000..dce4429
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/ShellContainerProviderIT.java
@@ -0,0 +1,95 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.autoscale.impl.shell.ShellStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Shell container provider and shell status provider test. Scale-up and -down
+ * only, no failures.
+ *
+ * @see ShellContainerProvider
+ * @see ShellStatusProvider
+ */
+@Test(groups = { "integration", "shell" })
+public class ShellContainerProviderIT {
+
+ static final Logger log = Logger.getLogger(ShellContainerProviderIT.class);
+
+ static final long TEST_TIMEOUT = 20000;
+ static final long REBALANCE_TIMEOUT = 10000;
+
+ static final int CONTAINER_COUNT = 4;
+
+ StaticTargetProvider clusterStatusProvider;
+ ShellContainerProviderProcess containerProvider;
+ ShellStatusProvider containerStatusProvider;
+
+ @BeforeClass(alwaysRun = true)
+ public void setupClass() {
+ log.info("installing shutdown hook");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ try { teardownTest(); } catch(Exception ignore) {};
+ }
+ }));
+ }
+
+ @BeforeMethod(alwaysRun = true)
+ public void setupTest() throws Exception {
+ teardownTest();
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ containerProvider = TestUtils.makeShellProvider("provider_0");
+ clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ containerStatusProvider = new ShellStatusProvider();
+ TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+ }
+
+ @AfterMethod(alwaysRun = true)
+ public void teardownTest() throws Exception {
+ TestUtils.stopTestCluster();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testStatic() throws Exception {
+ log.info("testing static");
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleUp() throws Exception {
+ log.info("testing scale up");
+ setContainerCount(CONTAINER_COUNT + 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleDown() throws Exception {
+ log.info("testing scale down");
+ setContainerCount(CONTAINER_COUNT - 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleCycle() throws Exception {
+ log.info("testing scale cycle");
+ setContainerCount(CONTAINER_COUNT + 2);
+ setContainerCount(CONTAINER_COUNT);
+ setContainerCount(CONTAINER_COUNT - 2);
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ void setContainerCount(int newContainerCount) throws Exception {
+ log.debug(String.format("Setting container count to %d", newContainerCount));
+ clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+ TestUtils.rebalanceTestCluster();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtils.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtils.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtils.java
new file mode 100644
index 0000000..c68b2ca
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtils.java
@@ -0,0 +1,443 @@
+package org.apache.helix.autoscale;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.TimeoutException;
+
+import org.I0Itec.zkclient.IDefaultNameSpace;
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkServer;
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.StatusProvider;
+import org.apache.helix.autoscale.StatusProviderService;
+import org.apache.helix.autoscale.TargetProvider;
+import org.apache.helix.autoscale.TargetProviderService;
+import org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProperties;
+import org.apache.helix.autoscale.provider.ProviderProperties;
+import org.apache.helix.autoscale.provider.ProviderRebalancer;
+import org.apache.helix.autoscale.provider.ProviderRebalancerSingleton;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.ExternalView;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+/**
+ * Utility for creating a test cluster without the bootstrapping tool. Methods
+ * for verifying the number of active instances and partitions in a cluster.
+ *
+ */
+public class TestUtils {
+
+ static final Logger log = Logger.getLogger(TestUtils.class);
+
+ public static int zkPort;
+ public static String zkAddress;
+ public static String resmanAddress;
+ public static String schedulerAddress;
+ public static String hdfsAddress;
+ public static String yarnUser;
+
+ public static final String metaClusterName = "meta-cluster";
+ public static final String managedClusterName = "managed-cluster";
+ public static final String metaResourceName = "container";
+ public static final String managedResourceName = "database";
+
+ public static final int numManagedPartitions = 10;
+ public static final int numManagedReplica = 2;
+
+ public static final long TEST_TIMEOUT = 120000;
+ public static final long REBALANCE_TIMEOUT = 60000;
+ public static final long POLL_INTERVAL = 1000;
+
+ public static final ProviderProperties providerProperties = new ProviderProperties();
+
+ public static ZkServer server = null;
+ public static HelixAdmin admin = null;
+ public static HelixManager metaControllerManager = null;
+ public static HelixManager managedControllerManager = null;
+
+ public static Collection<Service> providerServices = new ArrayList<Service>();
+ public static Collection<Service> auxServices = new ArrayList<Service>();
+
+ public static TargetProvider targetProvider = null;
+ public static StatusProvider statusProvider = null;
+
+ static {
+ try {
+ configure();
+ } catch(Exception e) {
+ log.error("Could not setup TestUtils", e);
+ throw new RuntimeException(e);
+ }
+ }
+
+ private TestUtils() {
+ // left blank
+ }
+
+ public static void configure() throws IOException {
+ configure("standalone.properties");
+ }
+
+ public static void configure(String resourcePath) throws IOException {
+ log.info(String.format("Configuring Test cluster from %s", resourcePath));
+ Properties properties = new Properties();
+ properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+ configure(properties);
+ }
+
+ public static void configure(Properties properties) {
+ log.info(String.format("Configuring from properties '%s'", properties));
+
+ zkPort = Integer.valueOf(properties.getProperty("zookeeper.port"));
+ zkAddress = properties.getProperty("zookeeper.address");
+ resmanAddress = properties.getProperty("yarn.resourcemanager");
+ schedulerAddress = properties.getProperty("yarn.scheduler");
+ hdfsAddress = properties.getProperty("yarn.hdfs");
+ yarnUser = properties.getProperty("yarn.user");
+
+ Preconditions.checkNotNull(zkPort);
+ Preconditions.checkNotNull(zkAddress);
+ Preconditions.checkNotNull(resmanAddress);
+ Preconditions.checkNotNull(schedulerAddress);
+ Preconditions.checkNotNull(hdfsAddress);
+ Preconditions.checkNotNull(yarnUser);
+
+ configureInternal();
+ }
+
+ static void configureInternal() {
+ providerProperties.clear();
+ providerProperties.setProperty(ProviderProperties.ADDRESS, zkAddress);
+ providerProperties.setProperty(ProviderProperties.CLUSTER, managedClusterName);
+ providerProperties.setProperty(ProviderProperties.METAADDRESS, zkAddress);
+ providerProperties.setProperty(ProviderProperties.METACLUSTER, metaClusterName);
+ providerProperties.setProperty(ProviderProperties.NAME, "<unknown>");
+
+ Properties containerProperties = new Properties();
+ containerProperties.setProperty("class", "org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess");
+
+ providerProperties.addContainer("container", containerProperties);
+
+ log.info(String.format("Using provider properties '%s'", providerProperties));
+ }
+
+ public static void startZookeeper() throws Exception {
+ log.info("Starting ZooKeeper");
+
+ if (server != null)
+ throw new IllegalStateException("Zookeeper already running");
+
+ server = createLocalZookeeper();
+ server.start();
+ }
+
+ public static void stopZookeeper() throws Exception {
+ log.info("Stopping ZooKeeper");
+
+ if (server != null) {
+ server.shutdown();
+ server = null;
+ }
+ }
+
+ public static void startTestCluster(TargetProviderService targetProvider, StatusProviderService statusProvider, Service... containerProviderProcesses)
+ throws Exception {
+ log.debug(String.format("Starting test cluster"));
+
+ if (server == null)
+ throw new IllegalStateException("Zookeeper not running yet");
+
+ if (!auxServices.isEmpty() || !providerServices.isEmpty() || admin != null || metaControllerManager != null || managedControllerManager != null)
+ throw new IllegalStateException("TestCluster already running");
+
+ log.debug("Create admin");
+ admin = new ZKHelixAdmin(zkAddress);
+
+ log.debug("Create clusters");
+ admin.addCluster(metaClusterName, true);
+ admin.addCluster(managedClusterName, true);
+
+ log.debug("Setup config tool");
+ ProviderRebalancerSingleton.setTargetProvider(targetProvider);
+ ProviderRebalancerSingleton.setStatusProvider(statusProvider);
+
+ log.debug("Starting target and status provider");
+ TestUtils.targetProvider = startAuxService(targetProvider);
+ TestUtils.statusProvider = startAuxService(statusProvider);
+
+ // Managed Cluster
+ log.debug("Setup managed cluster");
+ admin.addStateModelDef(managedClusterName, "MasterSlave", new StateModelDefinition(StateModelConfigGenerator.generateConfigForMasterSlave()));
+ admin.addResource(managedClusterName, managedResourceName, numManagedPartitions, "MasterSlave", RebalanceMode.FULL_AUTO.toString());
+ IdealState managedIdealState = admin.getResourceIdealState(managedClusterName, managedResourceName);
+ managedIdealState.setInstanceGroupTag(metaResourceName);
+ managedIdealState.setReplicas(String.valueOf(numManagedReplica));
+ admin.setResourceIdealState(managedClusterName, managedResourceName, managedIdealState);
+
+ // Meta Cluster
+ log.debug("Setup meta cluster");
+ admin.addStateModelDef(metaClusterName, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+ admin.addResource(metaClusterName, metaResourceName, targetProvider.getTargetContainerCount(metaResourceName), "OnlineOffline",
+ RebalanceMode.USER_DEFINED.toString());
+
+ IdealState idealState = admin.getResourceIdealState(metaClusterName, metaResourceName);
+ idealState.setRebalancerClassName(ProviderRebalancer.class.getName());
+ idealState.setReplicas("1");
+
+ // BEGIN workaround
+ // FIXME workaround for HELIX-226
+ Map<String, List<String>> listFields = Maps.newHashMap();
+ Map<String, Map<String, String>> mapFields = Maps.newHashMap();
+ for (int i = 0; i < 256; i++) {
+ String partitionName = metaResourceName + "_" + i;
+ listFields.put(partitionName, new ArrayList<String>());
+ mapFields.put(partitionName, new HashMap<String, String>());
+ }
+ idealState.getRecord().setListFields(listFields);
+ idealState.getRecord().setMapFields(mapFields);
+ // END workaround
+
+ admin.setResourceIdealState(metaClusterName, metaResourceName, idealState);
+
+ log.debug("Starting container providers");
+ for (Service service : containerProviderProcesses) {
+ startProviderService(service);
+ }
+
+ log.debug("Starting managed cluster controller");
+ managedControllerManager = HelixControllerMain.startHelixController(zkAddress, managedClusterName, "managedController", HelixControllerMain.STANDALONE);
+
+ log.debug("Starting meta cluster controller");
+ metaControllerManager = HelixControllerMain.startHelixController(zkAddress, metaClusterName, "metaController", HelixControllerMain.STANDALONE);
+
+ log.debug("Waiting for stable state");
+ waitUntilRebalancedCount(targetProvider.getTargetContainerCount(metaResourceName));
+ }
+
+ public static void stopTestCluster() throws Exception {
+ log.debug(String.format("Stopping test cluster"));
+ if (managedControllerManager != null) {
+ log.info("Disconnecting managed cluster controller");
+ managedControllerManager.disconnect();
+ }
+ if (metaControllerManager != null) {
+ log.info("Disconnecting meta cluster controller");
+ metaControllerManager.disconnect();
+ }
+ log.info("Stopping provider services");
+ if (providerServices != null) {
+ for (Service service : providerServices) {
+ service.stop();
+ }
+ providerServices.clear();
+ }
+ log.debug("Stopping auxillary services");
+ if (auxServices != null) {
+ for (Service service : auxServices) {
+ service.stop();
+ }
+ auxServices.clear();
+ }
+
+ admin = null;
+ metaControllerManager = null;
+ managedControllerManager = null;
+ }
+
+ public static <T extends Service> T startAuxService(T service) throws Exception {
+ auxServices.add(service);
+ service.start();
+ return service;
+ }
+
+ public static <T extends Service> T startProviderService(T service) throws Exception {
+ providerServices.add(service);
+ service.start();
+ return service;
+ }
+
+ public static void rebalanceTestCluster() throws Exception {
+ log.debug(String.format("Triggering rebalance"));
+ IdealState poke = admin.getResourceIdealState(metaClusterName, metaResourceName);
+ admin.setResourceIdealState(metaClusterName, metaResourceName, poke);
+
+ int current = targetProvider.getTargetContainerCount(TestUtils.metaResourceName);
+ waitUntilRebalancedCount(current);
+ }
+
+ public static void waitUntilRebalancedCount(int containerCount) throws Exception {
+ log.debug(String.format("Waiting for rebalance with %d containers at '%s'", containerCount, zkAddress));
+
+ HelixAdmin admin = new ZKHelixAdmin(zkAddress);
+
+ try {
+ long limit = System.currentTimeMillis() + REBALANCE_TIMEOUT;
+ waitUntilPartitionCount(admin, metaClusterName, metaResourceName, containerCount, (limit - System.currentTimeMillis()));
+ waitUntilInstanceCount(admin, metaClusterName, metaResourceName, providerServices.size(), (limit - System.currentTimeMillis()));
+ waitUntilPartitionCount(admin, managedClusterName, managedResourceName, numManagedPartitions, (limit - System.currentTimeMillis()));
+
+ // FIXME workaround for Helix FULL_AUTO rebalancer not providing guarantees for cluster expansion
+ //waitUntilInstanceCount(admin, managedClusterName, managedResourceName, containerCount, (limit - System.currentTimeMillis()));
+ } catch (Exception e) {
+ throw e;
+ } finally {
+ admin.close();
+ }
+ }
+
+ public static void waitUntilInstanceCount(HelixAdmin admin, String cluster, String resource, int targetCount, long timeout) throws Exception {
+ log.debug(String.format("Waiting for instance count (cluster='%s', resource='%s', instanceCount=%d, timeout=%d)", cluster, resource, targetCount,
+ timeout));
+
+ long limit = System.currentTimeMillis() + timeout;
+ while (limit > System.currentTimeMillis()) {
+ int assignedCount = getAssingedInstances(admin, cluster, resource).size();
+ log.debug(String.format("checking instance count for '%s:%s': target=%d, current=%d", cluster, resource, targetCount, assignedCount));
+
+ if (targetCount == assignedCount) {
+ return;
+ }
+ Thread.sleep(POLL_INTERVAL);
+ }
+ throw new TimeoutException();
+ }
+
+ public static void waitUntilPartitionCount(HelixAdmin admin, String cluster, String resource, int targetCount, long timeout) throws Exception {
+ log.debug(String.format("Waiting for partition count (cluster='%s', resource='%s', partitionCount=%d, timeout=%d)", cluster, resource, targetCount,
+ timeout));
+
+ long limit = System.currentTimeMillis() + timeout;
+ while (limit > System.currentTimeMillis()) {
+ int assignedCount = getAssingedPartitions(admin, cluster, resource).size();
+ log.debug(String.format("checking partition count for '%s:%s': target=%d, current=%d", cluster, resource, targetCount, assignedCount));
+
+ if (targetCount == assignedCount) {
+ return;
+ }
+ Thread.sleep(POLL_INTERVAL);
+ }
+ throw new TimeoutException();
+ }
+
+ public static Set<String> getAssingedInstances(HelixAdmin admin, String clusterName, String resourceName) {
+ Set<String> assignedInstances = new HashSet<String>();
+
+ ExternalView externalView = admin.getResourceExternalView(clusterName, resourceName);
+
+ if (externalView == null)
+ return assignedInstances;
+
+ for (String partitionName : externalView.getPartitionSet()) {
+ Map<String, String> stateMap = externalView.getStateMap(partitionName);
+ if (stateMap == null)
+ continue;
+
+ for (String instanceName : stateMap.keySet()) {
+ String state = stateMap.get(instanceName);
+ if ("MASTER".equals(state) || "SLAVE".equals(state) || "ONLINE".equals(state)) {
+ assignedInstances.add(instanceName);
+ }
+ }
+ }
+
+ return assignedInstances;
+ }
+
+ public static Set<String> getAssingedPartitions(HelixAdmin admin, String clusterName, String resourceName) {
+ Set<String> assignedPartitions = new HashSet<String>();
+
+ ExternalView externalView = admin.getResourceExternalView(clusterName, resourceName);
+
+ if (externalView == null)
+ return assignedPartitions;
+
+ for (String partitionName : externalView.getPartitionSet()) {
+ Map<String, String> stateMap = externalView.getStateMap(partitionName);
+ if (stateMap == null)
+ continue;
+
+ for (String instanceName : stateMap.keySet()) {
+ String state = stateMap.get(instanceName);
+ if ("MASTER".equals(state) || "ONLINE".equals(state)) {
+ assignedPartitions.add(partitionName);
+ }
+ }
+ }
+
+ return assignedPartitions;
+ }
+
+ public static ZkServer createLocalZookeeper() throws Exception {
+ String baseDir = "/tmp/autoscale/";
+ final String dataDir = baseDir + "zk/dataDir";
+ final String logDir = baseDir + "zk/logDir";
+ FileUtils.deleteDirectory(new File(dataDir));
+ FileUtils.deleteDirectory(new File(logDir));
+
+ IDefaultNameSpace defaultNameSpace = new IDefaultNameSpace() {
+ @Override
+ public void createDefaultNameSpace(ZkClient zkClient) {
+
+ }
+ };
+ return new ZkServer(dataDir, logDir, defaultNameSpace, zkPort);
+ }
+
+ public static LocalContainerProviderProcess makeLocalProvider(String name) throws Exception {
+ LocalContainerProviderProcess process = new LocalContainerProviderProcess();
+ process.configure(makeProviderProperties(name));
+ return process;
+ }
+
+ public static ShellContainerProviderProcess makeShellProvider(String name) throws Exception {
+ ShellContainerProviderProcess process = new ShellContainerProviderProcess();
+ process.configure(makeProviderProperties(name));
+ return process;
+ }
+
+ public static YarnContainerProviderProcess makeYarnProvider(String name) throws Exception {
+ YarnContainerProviderProperties properties = new YarnContainerProviderProperties();
+
+ properties.putAll(makeProviderProperties(name));
+ properties.put(YarnContainerProviderProperties.YARNDATA, zkAddress);
+ properties.put(YarnContainerProviderProperties.RESOURCEMANAGER, resmanAddress);
+ properties.put(YarnContainerProviderProperties.SCHEDULER, schedulerAddress);
+ properties.put(YarnContainerProviderProperties.USER, yarnUser);
+ properties.put(YarnContainerProviderProperties.HDFS, hdfsAddress);
+
+ YarnContainerProviderProcess process = new YarnContainerProviderProcess();
+ process.configure(properties);
+
+ return process;
+ }
+
+ static ProviderProperties makeProviderProperties(String name) {
+ ProviderProperties properties = new ProviderProperties();
+ properties.putAll(providerProperties);
+ properties.setProperty(ProviderProperties.NAME, name);
+ return properties;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtilsUT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtilsUT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtilsUT.java
new file mode 100644
index 0000000..d55d7a4
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtilsUT.java
@@ -0,0 +1,63 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.StatusProviderService;
+import org.apache.helix.autoscale.TargetProviderService;
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.local.LocalStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.Test;
+
+/**
+ * Self-test of test cluster. Spawning zookeeper and cluster with single provider and single instance.
+ *
+ * @see TestUtils
+ */
+@Test(groups={"unit"})
+public class TestUtilsUT {
+
+ static final Logger log = Logger.getLogger(TestUtilsUT.class);
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testZookeeper() throws Exception {
+ log.info("testing zookeeper");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testCluster() throws Exception {
+ log.info("testing cluster");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+
+ TestUtils.startTestCluster(new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, 1)),
+ new LocalStatusProvider(), TestUtils.makeLocalProvider("test"));
+ TestUtils.stopTestCluster();
+
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testClusterRepeated() throws Exception {
+ log.info("testing cluster restart");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+
+ TargetProviderService statusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, 1));
+ Service containerProvider = TestUtils.makeLocalProvider("test");
+ StatusProviderService containerStatusProvider = new LocalStatusProvider();
+
+ TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+ TestUtils.stopTestCluster();
+
+ TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+ TestUtils.stopTestCluster();
+
+ TestUtils.stopZookeeper();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/YarnContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/YarnContainerProviderIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/YarnContainerProviderIT.java
new file mode 100644
index 0000000..78a0bf8
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/YarnContainerProviderIT.java
@@ -0,0 +1,101 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProperties;
+import org.apache.helix.autoscale.impl.yarn.YarnStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Yarn container provider and yarn status provider test. Scale-up and -down
+ * only, no failures.
+ *
+ * @see YarnContainerProvider
+ * @see YarnStatusProvider
+ */
+@Test(groups={"integration", "yarn"})
+public class YarnContainerProviderIT {
+
+ static final Logger log = Logger.getLogger(YarnContainerProviderIT.class);
+
+ static final int CONTAINER_COUNT = 4;
+
+ StaticTargetProvider clusterStatusProvider;
+ YarnContainerProviderProcess containerProvider;
+ YarnStatusProvider containerStatusProvider;
+
+ YarnContainerProviderProperties properties;
+
+ @BeforeClass(alwaysRun = true)
+ public void setupClass() throws Exception {
+ log.info("installing shutdown hook");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ try { teardownTest(); } catch(Exception ignore) {};
+ }
+ }));
+ }
+
+ @BeforeMethod(alwaysRun = true)
+ public void setupTest() throws Exception {
+ log.debug("setting up yarn test case");
+
+ teardownTest();
+ TestUtils.configure("distributed.properties");
+ TestUtils.startZookeeper();
+
+ containerProvider = TestUtils.makeYarnProvider("provider_0");
+ containerStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+ clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+
+ log.debug("running yarn test case");
+ }
+
+ @AfterMethod(alwaysRun = true)
+ public void teardownTest() throws Exception {
+ log.debug("cleaning up yarn test case");
+ TestUtils.stopTestCluster();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testStatic() throws Exception {
+ log.info("testing static");
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleUp() throws Exception {
+ log.info("testing scale up");
+ setContainerCount(CONTAINER_COUNT + 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleDown() throws Exception {
+ log.info("testing scale down");
+ setContainerCount(CONTAINER_COUNT - 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleCycle() throws Exception {
+ log.info("testing scale cycle");
+ setContainerCount(CONTAINER_COUNT + 2);
+ setContainerCount(CONTAINER_COUNT);
+ setContainerCount(CONTAINER_COUNT - 2);
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ void setContainerCount(int newContainerCount) throws Exception {
+ log.debug(String.format("Setting container count to %d", newContainerCount));
+ clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+ TestUtils.rebalanceTestCluster();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/resources/distributed.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/resources/distributed.properties b/recipes/auto-scale/src/test/resources/distributed.properties
new file mode 100644
index 0000000..47fd8e0
--- /dev/null
+++ b/recipes/auto-scale/src/test/resources/distributed.properties
@@ -0,0 +1,13 @@
+#
+# zookeeper will be spawned locally by TestUtils
+#
+zookeeper.port=2199
+zookeeper.address=rm:2199
+
+#
+# YARN and HDFS must be set up manually
+#
+yarn.resourcemanager=rm:8032
+yarn.scheduler=rm:8030
+yarn.hdfs=hdfs://rm:9000/
+yarn.user=yarn
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/resources/log4j.properties b/recipes/auto-scale/src/test/resources/log4j.properties
new file mode 100644
index 0000000..65800cc
--- /dev/null
+++ b/recipes/auto-scale/src/test/resources/log4j.properties
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Set root logger level to DEBUG and its only appender to R.
+log4j.rootLogger=ERROR, C
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.C=org.apache.log4j.ConsoleAppender
+log4j.appender.C.layout=org.apache.log4j.PatternLayout
+log4j.appender.C.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.I0Itec=ERROR
+log4j.logger.org.apache=ERROR
+
+log4j.logger.org.apache.helix.autoscale=DEBUG
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/resources/standalone.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/resources/standalone.properties b/recipes/auto-scale/src/test/resources/standalone.properties
new file mode 100644
index 0000000..d4b4e86
--- /dev/null
+++ b/recipes/auto-scale/src/test/resources/standalone.properties
@@ -0,0 +1,13 @@
+#
+# zookeeper will be spawned locally by TestUtils
+#
+zookeeper.port=2199
+zookeeper.address=localhost:2199
+
+#
+# YARN and HDFS must be set up manually
+#
+yarn.resourcemanager=localhost:8032
+yarn.scheduler=localhost:8030
+yarn.hdfs=hdfs://localhost:9000/
+yarn.user=yarn
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/README.md
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/README.md b/recipes/meta-cluster-manager/README.md
new file mode 100644
index 0000000..9a8acf4
--- /dev/null
+++ b/recipes/meta-cluster-manager/README.md
@@ -0,0 +1,82 @@
+<!---
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+Meta cluster manager
+------------------------
+Auto-scaling for helix clusters using a meta cluster. The managed cluster operates as usual, managing resources and instances via AUTO_REBALANCE. The meta cluster monitors the managed cluster and injects or removes instances based on demand.
+
+The meta cluster makes decisions about scaling up or down based on information obtained from a "ClusterStatusProvider". A custom "ProviderRebalancer" is invoked testing the health of existing participants in the managed cluster with the "ContainerStatusProvider". If participants need to be (re-)deployed the "ContainerProvider" is invoked to instantiate and inject participants in the managed cluster.
+
+ContainerProviders are the participants of the meta cluster and there are multiple different implementations of the "ContainerProvider". First, the "LocalContainerProvider" spawns VM-local participants, i.e. participants of the managed cluster are spawned in the same VM the container provider exists. This is mainly useful for testing. Second, the "ShellContainerProvider" spawns a separate VM process for each participant using shell commands. Third, the "YarnContainerProvider" creates processes as container on a YARN cluster and manages their status using an external meta-data service (Zookeeper in this implementation). This implementation is fairly complex and has a number of external dependencies on a working YARN cluster and running services.
+
+Even though there are different types of providers the notion of a "ContainerProcess" abstracts implementation specifics. A process implementation inherits from "ContainerProcess" and can be instantiated by all three types of container providers. CAUTION: since separate VM process might be used a VM external method for coordination is required (e.g. Zookeeper)
+
+Configuration settings are passed throughout the application using traditional Properties objects. The "ConfigTool" contains default paths and helps to inject dependencies in the ProviderRebalancer.
+
+The application can be run and tested in three ways. First, a comprehensive suite of unit and integration tests can be run using "mvn verify". Second, the "Bootstrapper" can deploy a live managed and meta cluster based on a specification (e.g. "2by2shell.properties"), Third, the "MetaManagerDemo" deploys a test cluster and allows the user to step through a cycle of scale-up and scale-down as well as simulated container and container provider failures.
+
+
+The IdealState of the meta cluster uses the ONLINE-OFFLINE model and maps as follows in the axample below:
+
+Resource: type of container, e.g. database, webserver
+Partition: container id
+Instance: responsible container provider
+
+META:
+
+database
+ database_0
+ provider_0 : ONLINE
+ database_1
+ provider_1 : ONLINE
+webserver
+ webserver_0
+ provider_0 : ONLINE
+ webserver_1
+ provider_1 : ONLINE
+ webserver_2
+ provider_0 : ONLINE
+
+
+MANAGED:
+
+dbprod (tag=database)
+ dbprod_0
+ database_0 : MASTER
+ database_1 : SLAVE
+ dbprod_1
+ database_0 : SLAVE
+ database_1 : MASTER
+ dbprod_2
+ database_0 : MASTER
+ database_1 : SLAVE
+wsprod (tag=webserver)
+ wsprod_0
+ webserver_0 : ONLINE
+ wsprod_1
+ webserver_1 : ONLINE
+ wsprod_2
+ webserver_2 : ONLINE
+ wsprod_3
+ webserver_0 : ONLINE
+ wsprod_4
+ webserver_1 : ONLINE
+ wsprod_5
+ webserver_2 : ONLINE
+
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/pom.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/pom.xml b/recipes/meta-cluster-manager/pom.xml
new file mode 100644
index 0000000..ba5eb69
--- /dev/null
+++ b/recipes/meta-cluster-manager/pom.xml
@@ -0,0 +1,210 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.helix.recipes</groupId>
+ <artifactId>recipes</artifactId>
+ <version>0.6.2-incubating-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>meta-cluster-manager</artifactId>
+ <packaging>jar</packaging>
+ <name>Apache Helix :: Recipes :: meta cluster manager</name>
+
+ <properties>
+ <hadoop.version>0.23.9</hadoop.version>
+
+ <ut.groups>unit</ut.groups>
+ <it.groups>local, shell</it.groups>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.testng</groupId>
+ <artifactId>testng</artifactId>
+ <version>6.0.1</version>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ <version>2.2.4</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.helix</groupId>
+ <artifactId>helix-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>javax.mail</groupId>
+ <artifactId>mail</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.jms</groupId>
+ <artifactId>jms</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jdmk</groupId>
+ <artifactId>jmxtools</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jmx</groupId>
+ <artifactId>jmxri</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>14.0.1</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-api</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-common</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>appassembler-maven-plugin</artifactId>
+ <configuration>
+ <configurationDirectory>conf</configurationDirectory>
+ <copyConfigurationDirectory>true</copyConfigurationDirectory>
+ <includeConfigurationDirectoryInClasspath>true</includeConfigurationDirectoryInClasspath>
+ <assembleDirectory>${project.build.directory}/metamanager-pkg</assembleDirectory>
+ <extraJvmArguments>-Xms512m -Xmx512m</extraJvmArguments>
+ <platforms>
+ <platform>unix</platform>
+ </platforms>
+ </configuration>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>assemble</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <configuration>
+ <excludes combine.children="append">
+ </excludes>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>appassembler-maven-plugin</artifactId>
+ <configuration>
+ <programs>
+ <program>
+ <mainClass>org.apache.helix.metamanager.bootstrapper.Boot</mainClass>
+ <name>boot</name>
+ </program>
+ <program>
+ <mainClass>org.apache.helix.metamanager.impl.shell.ShellContainerProcess</mainClass>
+ <name>shell-container-process</name>
+ </program>
+ <program>
+ <mainClass>org.apache.helix.metamanager.impl.yarn.YarnMasterProcess</mainClass>
+ <name>yarn-master-process</name>
+ </program>
+ <program>
+ <mainClass>org.apache.helix.metamanager.impl.yarn.YarnContainerProcess</mainClass>
+ <name>yarn-container-process</name>
+ </program>
+ <program>
+ <mainClass>org.apache.helix.metamanager.ZookeeperSetter</mainClass>
+ <name>zookeeper-setter</name>
+ </program>
+ </programs>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <finalName>metamanager</finalName>
+ <descriptor>src/main/assembly/assembly.xml</descriptor>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <groups>${ut.groups}</groups>
+ <excludedGroups>integration</excludedGroups>
+ <suiteXmlFiles>
+ <suiteXmlFile>src/test/config/testng.xml</suiteXmlFile>
+ </suiteXmlFiles>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ <configuration>
+ <groups>${it.groups}</groups>
+ <excludedGroups>unit</excludedGroups>
+ <suiteXmlFiles>
+ <suiteXmlFile>src/test/config/testng.xml</suiteXmlFile>
+ </suiteXmlFiles>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>integration-test</goal>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+ <profiles>
+ <profile>
+ <id>yarn</id>
+ <properties>
+ <it.groups>yarn</it.groups>
+ </properties>
+ </profile>
+ </profiles>
+</project>