You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@helix.apache.org by ki...@apache.org on 2013/09/20 20:30:19 UTC

[10/15] Adding Helix-task-framework and Yarn integration modules

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModel.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModel.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModel.java
new file mode 100644
index 0000000..814387f
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModel.java
@@ -0,0 +1,114 @@
+package org.apache.helix.autoscale.provider;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.autoscale.ClusterAdmin;
+import org.apache.helix.autoscale.ContainerProvider;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Helix state model implementation for {@link ContainerProvider}s. Updates
+ * configuration of managed Helix cluster and spawns and destroys container
+ * instances.
+ * 
+ */
+@StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE" })
+public class ProviderStateModel extends StateModel {
+
+    static final Logger log = Logger.getLogger(ProviderStateModel.class);
+
+    ContainerProvider   provider;
+    ClusterAdmin        admin;
+
+    public ProviderStateModel(ContainerProvider provider, ClusterAdmin admin) {
+        this.provider = provider;
+        this.admin = admin;
+    }
+
+    @Transition(from = "OFFLINE", to = "ONLINE")
+    public void acquire(Message m, NotificationContext context) throws Exception {
+        String containerType = m.getResourceName();
+        String containerId = m.getPartitionName();
+        String instanceId = context.getManager().getInstanceName();
+
+        log.trace(String.format("%s:%s transitioning from OFFLINE to ONLINE", containerId, instanceId));
+
+        bestEffortRemove(containerId);
+
+        // add instance to cluster
+        admin.addInstance(containerId, containerType);
+
+        // create container
+        provider.create(containerId, containerType);
+
+        try {
+            admin.rebalance();
+        } catch (Exception e) {
+            // ignore
+            log.warn(String.format("rebalancing cluster failed (error='%s')", e.getMessage()));
+        }
+
+        log.info(String.format("%s acquired container '%s' (type='%s')", instanceId, containerId, containerType));
+    }
+
+    @Transition(from = "ONLINE", to = "OFFLINE")
+    public void release(Message m, NotificationContext context) {
+        String containerId = m.getPartitionName();
+        String instanceId = context.getManager().getInstanceName();
+
+        log.trace(String.format("%s:%s transitioning from ONLINE to OFFLINE", containerId, instanceId));
+
+        bestEffortRemove(containerId);
+
+        try {
+            admin.rebalance();
+        } catch (Exception e) {
+            // ignore
+            log.warn(String.format("rebalancing cluster failed (error='%s')", e.getMessage()));
+        }
+
+        log.info(String.format("%s destroyed container '%s'", instanceId, containerId));
+
+    }
+
+    @Transition(from = "ERROR", to = "OFFLINE")
+    public void recover(Message m, NotificationContext context) {
+        String containerId = m.getPartitionName();
+        String instanceId = context.getManager().getInstanceName();
+
+        log.trace(String.format("%s:%s transitioning from ERROR to OFFLINE", containerId, instanceId));
+
+        release(m, context);
+    }
+
+    @Transition(from = "OFFLINE", to = "DROPPED")
+    public void drop(Message m, NotificationContext context) {
+        String containerId = m.getPartitionName();
+        String instanceId = context.getManager().getInstanceName();
+
+        log.trace(String.format("%s:%s transitioning from OFFLINE to DROPPED", containerId, instanceId));
+    }
+
+    private void bestEffortRemove(String containerId) {
+        log.debug(String.format("Best effort removal of container '%s'", containerId));
+
+        try {
+            provider.destroy(containerId);
+            log.debug(String.format("Container '%s' destroyed", containerId));
+        } catch (Exception e) {
+            log.debug(String.format("Container '%s' does not exist", containerId));
+        }
+
+        try {
+            admin.removeInstance(containerId);
+            log.debug(String.format("Instance '%s' removed", containerId));
+        } catch (Exception e) {
+            log.debug(String.format("Instance '%s' does not exist", containerId));
+        }
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModelFactory.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModelFactory.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModelFactory.java
new file mode 100644
index 0000000..2613336
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModelFactory.java
@@ -0,0 +1,27 @@
+package org.apache.helix.autoscale.provider;
+
+import org.apache.helix.autoscale.ClusterAdmin;
+import org.apache.helix.autoscale.ContainerProvider;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+
+/**
+ * Factory for {@link ProviderStateModel}. Injects {@link ClusterAdmin} for
+ * managed cluster and {@link ContainerProvider}.
+ * 
+ */
+class ProviderStateModelFactory extends StateModelFactory<ProviderStateModel> {
+
+    final ContainerProvider provider;
+    final ClusterAdmin      admin;
+
+    public ProviderStateModelFactory(ContainerProvider provider, ClusterAdmin admin) {
+        super();
+        this.provider = provider;
+        this.admin = admin;
+    }
+
+    @Override
+    public ProviderStateModel createNewStateModel(String partitionName) {
+        return new ProviderStateModel(provider, admin);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/Boot2By2Local.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/Boot2By2Local.properties b/recipes/auto-scale/src/main/resources/Boot2By2Local.properties
new file mode 100644
index 0000000..13fb4ff
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/Boot2By2Local.properties
@@ -0,0 +1,87 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/autoscale/zk/data
+zookeeper.logdir=/tmp/autoscale/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=localhost:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=localhost:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=localhost:2199
+metaresource.0.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=localhost:2199
+metaresource.1.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=localhost:2199
+metaprovider.0.class=org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=localhost:2199
+metaprovider.1.class=org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.autoscale.impl.local.LocalStatusProvider
+
+metacontroller.target.class=org.apache.helix.autoscale.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/Boot2By2Shell.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/Boot2By2Shell.properties b/recipes/auto-scale/src/main/resources/Boot2By2Shell.properties
new file mode 100644
index 0000000..079771b
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/Boot2By2Shell.properties
@@ -0,0 +1,87 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/autoscale/zk/data
+zookeeper.logdir=/tmp/autoscale/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=localhost:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=localhost:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=localhost:2199
+metaresource.0.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=localhost:2199
+metaresource.1.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=localhost:2199
+metaprovider.0.class=org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=localhost:2199
+metaprovider.1.class=org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.autoscale.impl.shell.ShellStatusProvider
+
+metacontroller.target.class=org.apache.helix.autoscale.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/Boot2By2Yarn.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/Boot2By2Yarn.properties b/recipes/auto-scale/src/main/resources/Boot2By2Yarn.properties
new file mode 100644
index 0000000..e447711
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/Boot2By2Yarn.properties
@@ -0,0 +1,98 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/autoscale/zk/data
+zookeeper.logdir=/tmp/autoscale/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=rm:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=rm:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=rm:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=rm:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=rm:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=rm:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=rm:2199
+metaresource.0.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=rm:2199
+metaresource.1.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=rm:2199
+metaprovider.0.class=org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess
+metaprovider.0.yarndata=rm:2199
+metaprovider.0.resourcemananger=rm:8032
+metaprovider.0.scheduler=rm:8030
+metaprovider.0.user=yarn
+metaprovider.0.hdfs=hdfs://rm:9000/
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=rm:2199
+metaprovider.1.class=org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess
+metaprovider.1.yarndata=rm:2199
+metaprovider.1.resourcemananger=rm:8032
+metaprovider.1.scheduler=rm:8030
+metaprovider.1.user=yarn
+metaprovider.1.hdfs=hdfs://rm:9000/
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=rm:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.autoscale.impl.yarn.YarnStatusProvider
+metacontroller.status.yarndata=rm:2199
+
+metacontroller.target.class=org.apache.helix.autoscale.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/BootLocal.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/BootLocal.properties b/recipes/auto-scale/src/main/resources/BootLocal.properties
new file mode 100644
index 0000000..15905fc
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/BootLocal.properties
@@ -0,0 +1,68 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/autoscale/zk/data
+zookeeper.logdir=/tmp/autoscale/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.name=resource
+resource.cluster=cluster
+resource.address=localhost:2199
+resource.container=container
+resource.model=MasterSlave
+resource.partitions=10
+resource.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.name=container
+metaresource.metacluster=meta
+metaresource.metaaddress=localhost:2199
+metaresource.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.name=provider
+metaprovider.metacluster=meta
+metaprovider.metaaddress=localhost:2199
+metaprovider.class=org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.autoscale.impl.local.LocalStatusProvider
+
+metacontroller.target.class=org.apache.helix.autoscale.impl.StaticTargetProvider
+metacontroller.target.container=7

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/RedisYarnSample.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/RedisYarnSample.properties b/recipes/auto-scale/src/main/resources/RedisYarnSample.properties
new file mode 100644
index 0000000..eb58fd2
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/RedisYarnSample.properties
@@ -0,0 +1,89 @@
+###############################################################################
+# Managed cluster configuration
+###############################################################################
+
+cluster.name                      = rediscluster
+cluster.address                   = helix.zookeeper.intra
+
+#
+# Helix cluster resources
+# (as usual)
+#
+resource.0.name                   = redis
+resource.0.cluster                = rediscluster
+resource.0.address                = helix.zookeeper.intra
+resource.0.container              = rediscontainer           # see meta cluster
+resource.0.model                  = OnlineOffline
+resource.0.partitions             = 1024                     # are auto-assigned
+resource.0.replica                = 1                        # no replication needed
+
+#
+# Helix cluster controllers
+# (as usual)
+#
+controller.name                   = controller
+controller.cluster                = rediscluster
+controller.address                = helix.zookeeper.intra
+
+###############################################################################
+# Meta cluster configuration
+###############################################################################
+
+metacluster.name                  = meta
+metacluster.address               = helix.zookeeper.intra
+metacluster.managedcluster        = rediscluster
+metacluster.managedaddress        = helix.zookeeper.intra
+
+#
+# Container Types
+# (Base configuration for instances spawned by providers)
+#
+metaresource.0.name               = rediscontainer
+metaresource.0.class              = org.apache.helix.autoscale.impl.container.RedisServerProcess
+metaresource.0.metacluster        = meta
+metaresource.0.metaaddress        = helix.zookeeper.intra
+metaresource.0.address            = apps.zookeeper.intra
+metaresource.0.baseport           = 17000                    # instance id is added
+
+#
+# Container Instance Providers
+# (Endpoints of container deployment frameworks, e.g. Apache YARN)
+#
+metaprovider.0.name               = provider
+metaprovider.0.class              = org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess
+metaprovider.0.metacluster        = meta
+metaprovider.0.metaaddress        = helix.zookeeper.intra
+metaprovider.0.yarndata           = yarn.zookeeper.intra
+metaprovider.0.resourcemananger   = yarn-rm.intra:8032
+metaprovider.0.scheduler          = yarn-rm.intra:8030
+metaprovider.0.hdfs               = hdfs://yarn-hdfs.intra:9000/
+metaprovider.0.user               = yarnuser
+
+#
+# Helix meta cluster controller
+# (Monitors system state and adapts config of Container Instance Providers)
+#
+metacontroller.name               = metacontroller
+metacontroller.metacluster        = meta
+metacontroller.metaddress         = helix.zookeeper.intra
+metacontroller.autorefresh        = 10000                    # status refresh interval
+
+#
+# Container Status Provider
+# (Provides low-level data on container instance health)
+#
+metacontroller.status.class       = org.apache.helix.autoscale.impl.yarn.YarnStatusProvider
+metacontroller.status.yarndata    = yarn.zookeeper.intra
+
+#
+# Performance Target Model
+# (Provides target number of container instances)
+#
+metacontroller.target.class       = org.apache.helix.autoscale.impl.RedisTargetProvider
+metacontroller.target.address     = apps.zookeeper.intra
+metacontroller.target.interval    = 10000                    # interval for Tps probes
+metacontroller.target.timeout     = 9000                     # timeout of probe
+metacontroller.target.get         = 1000000                  # target "GET" Tps
+metacontroller.target.min         = 1                        # min container count
+metacontroller.target.max         = 23                       # max container count
+metacontroller.target.alpha       = 0.1                      # exponential average
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/log4j.properties b/recipes/auto-scale/src/main/resources/log4j.properties
new file mode 100644
index 0000000..7f29be2
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/log4j.properties
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Set root logger level to DEBUG and its only appender to R.
+log4j.rootLogger=ERROR, C
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.C=org.apache.log4j.ConsoleAppender
+log4j.appender.C.layout=org.apache.log4j.PatternLayout
+log4j.appender.C.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.I0Itec=ERROR
+log4j.logger.org.apache=ERROR
+
+log4j.logger.org.apache.helix.autoscale=INFO

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/config/testng.xml
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/config/testng.xml b/recipes/auto-scale/src/test/config/testng.xml
new file mode 100644
index 0000000..f710791
--- /dev/null
+++ b/recipes/auto-scale/src/test/config/testng.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="Suite" parallel="none">
+  <test name="Test" preserve-order="true">
+    <packages>
+      <package name="org.apache.helix.autoscale.*"/>
+    </packages>
+  </test>
+</suite>

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/BootstrapperIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/BootstrapperIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/BootstrapperIT.java
new file mode 100644
index 0000000..5dd7820
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/BootstrapperIT.java
@@ -0,0 +1,134 @@
+package org.apache.helix.autoscale;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.bootstrapper.Boot;
+import org.apache.helix.autoscale.bootstrapper.ClusterService;
+import org.apache.helix.autoscale.bootstrapper.ControllerService;
+import org.apache.helix.autoscale.bootstrapper.MetaClusterService;
+import org.apache.helix.autoscale.bootstrapper.MetaControllerService;
+import org.apache.helix.autoscale.bootstrapper.MetaProviderService;
+import org.apache.helix.autoscale.bootstrapper.MetaResourceService;
+import org.apache.helix.autoscale.bootstrapper.ResourceService;
+import org.apache.helix.autoscale.bootstrapper.ZookeeperService;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.log4j.Logger;
+import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Bootstrapping tool test. Reads cluster configuration from *.properties files,
+ * spawns services and verifies number of active partitions and containers
+ * 
+ * @see Boot
+ */
+@Test(groups = { "integration", "boot" })
+public class BootstrapperIT {
+
+    static final Logger log = Logger.getLogger(BootstrapperIT.class);
+
+    Boot                boot;
+    HelixAdmin          admin;
+
+    @AfterMethod(alwaysRun = true)
+    public void teardown() throws Exception {
+        log.debug("tearing down bootstrap test");
+        if (admin != null) {
+            admin.close();
+            admin = null;
+        }
+        if (boot != null) {
+            boot.stop();
+            boot = null;
+        }
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+    public void bootstrapLocalTest() throws Exception {
+        boot = new Boot();
+        boot.configure(getProperties("BootLocal.properties"));
+        boot.start();
+
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), ZookeeperService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), ClusterService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), ResourceService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), ControllerService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaClusterService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaResourceService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaProviderService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaControllerService.class));
+
+        final long limit = System.currentTimeMillis() + TestUtils.REBALANCE_TIMEOUT;
+
+        admin = new ZKHelixAdmin("localhost:2199");
+        waitUntil(admin, "meta", "container", 1, 7, (limit - System.currentTimeMillis()));
+        waitUntil(admin, "cluster", "resource", 7, 10, (limit - System.currentTimeMillis()));
+
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+    public void bootstrap2By2LocalTest() throws Exception {
+        boot = new Boot();
+        boot.configure(getProperties("Boot2By2Local.properties"));
+        boot.start();
+
+        verify2By2Setup();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+    public void bootstrap2By2ShellTest() throws Exception {
+        boot = new Boot();
+        boot.configure(getProperties("Boot2By2Shell.properties"));
+        boot.start();
+
+        verify2By2Setup();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+    public void bootstrap2By2YarnTest() throws Exception {
+        boot = new Boot();
+        boot.configure(getProperties("Boot2By2Yarn.properties"));
+        boot.start();
+
+        verify2By2Setup();
+    }
+
+    void verify2By2Setup() throws Exception {
+        final long limit = System.currentTimeMillis() + TestUtils.REBALANCE_TIMEOUT;
+        final String address = "localhost:2199";
+
+        log.debug(String.format("connecting to zookeeper at '%s'", address));
+
+        admin = new ZKHelixAdmin(address);
+        waitUntil(admin, "meta", "database", 2, 3, (limit - System.currentTimeMillis()));
+        waitUntil(admin, "meta", "webserver", 2, 5, (limit - System.currentTimeMillis()));
+        waitUntil(admin, "cluster", "dbprod", 3, 8, (limit - System.currentTimeMillis()));
+        waitUntil(admin, "cluster", "wsprod", 5, 15, (limit - System.currentTimeMillis()));
+    }
+
+    static void waitUntil(HelixAdmin admin, String cluster, String resource, int instanceCount, int partitionCount, long timeout) throws Exception {
+        final long limit = System.currentTimeMillis() + timeout;
+        TestUtils.waitUntilInstanceCount(admin, cluster, resource, instanceCount, (limit - System.currentTimeMillis()));
+        TestUtils.waitUntilPartitionCount(admin, cluster, resource, partitionCount, (limit - System.currentTimeMillis()));
+    }
+
+    static Properties getProperties(String resourcePath) throws IOException {
+        Properties properties = new Properties();
+        properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+        return properties;
+    }
+
+    static boolean containsInstanceOf(Collection<Service> services, Class<?> clazz) {
+        for (Service service : services) {
+            if (clazz.isAssignableFrom(service.getClass()))
+                return true;
+        }
+        return false;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/FailoverIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/FailoverIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/FailoverIT.java
new file mode 100644
index 0000000..429146a
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/FailoverIT.java
@@ -0,0 +1,195 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.autoscale.impl.local.LocalContainerSingleton;
+import org.apache.helix.autoscale.impl.local.LocalStatusProvider;
+import org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.autoscale.impl.shell.ShellContainerSingleton;
+import org.apache.helix.autoscale.impl.shell.ShellStatusProvider;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.autoscale.impl.yarn.YarnStatusProvider;
+import org.apache.helix.autoscale.impl.yarn.ZookeeperYarnDataProvider;
+import org.apache.helix.autoscale.provider.ProviderRebalancer;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Fault-recovery test for individual containers and whole providers. Missing
+ * containers should be replaced by the meta cluster Rebalancer using remaining
+ * active providers.
+ * 
+ * @see ProviderRebalancer
+ */
+@Test(groups = { "integration", "failure" })
+public class FailoverIT {
+
+    static final Logger  log             = Logger.getLogger(FailoverIT.class);
+
+    static final int     CONTAINER_COUNT = 7;
+
+    StaticTargetProvider targetProvider;
+    YarnStatusProvider   yarnStatusProvider;
+
+    @BeforeClass(alwaysRun = true)
+    public void setupClass() {
+        log.info("installing shutdown hook");
+        Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+            @Override
+            public void run() {
+                try {
+                    teardownTest();
+                } catch (Exception ignore) {
+                }
+                ;
+            }
+        }));
+    }
+
+    @BeforeMethod(alwaysRun = true)
+    public void setupTest() throws Exception {
+        teardownTest();
+        targetProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+    }
+
+    @AfterMethod(alwaysRun = true)
+    public void teardownTest() throws Exception {
+        TestUtils.stopTestCluster();
+
+        if (yarnStatusProvider != null) {
+            yarnStatusProvider.stop();
+            yarnStatusProvider = null;
+        }
+
+        TestUtils.stopZookeeper();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+    public void testLocalContainerFailover() throws Exception {
+        log.info("testing local container failover");
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        TestUtils.startTestCluster(targetProvider, new LocalStatusProvider(), makeLocalProviders(3));
+        killLocalContainers();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+    public void testLocalProviderFailover() throws Exception {
+        log.info("testing local provider failover");
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        TestUtils.startTestCluster(targetProvider, new LocalStatusProvider(), makeLocalProviders(3));
+        killProvider();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+    public void testShellContainerFailover() throws Exception {
+        log.info("testing shell container failover");
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        TestUtils.startTestCluster(targetProvider, new ShellStatusProvider(), makeShellProviders(3));
+        killShellContainers();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+    public void testShellProviderFailover() throws Exception {
+        log.info("testing shell provider failover");
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        TestUtils.startTestCluster(targetProvider, new ShellStatusProvider(), makeShellProviders(3));
+        killProvider();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+    public void testYarnContainerFailover() throws Exception {
+        log.info("testing yarn container failover");
+        TestUtils.configure("distributed.properties");
+        TestUtils.startZookeeper();
+        yarnStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+        yarnStatusProvider.start();
+        TestUtils.startTestCluster(targetProvider, yarnStatusProvider, makeYarnProviders(3));
+        killYarnContainers();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+    public void testYarnProviderFailover() throws Exception {
+        log.info("testing yarn provider failover");
+        TestUtils.configure("distributed.properties");
+        TestUtils.startZookeeper();
+        yarnStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+        yarnStatusProvider.start();
+        TestUtils.startTestCluster(targetProvider, yarnStatusProvider, makeYarnProviders(3));
+        killProvider();
+    }
+
+    void killLocalContainers() throws Exception {
+        LocalContainerSingleton.killProcess("container_2");
+        LocalContainerSingleton.killProcess("container_4");
+        LocalContainerSingleton.killProcess("container_6");
+        Thread.sleep(3000);
+        TestUtils.rebalanceTestCluster();
+        TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+    }
+
+    void killShellContainers() throws Exception {
+        ShellContainerSingleton.killProcess("container_2");
+        ShellContainerSingleton.killProcess("container_4");
+        ShellContainerSingleton.killProcess("container_6");
+        Thread.sleep(3000);
+        TestUtils.rebalanceTestCluster();
+        TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+    }
+
+    void killYarnContainers() throws Exception {
+        ZookeeperYarnDataProvider yarnDataService = new ZookeeperYarnDataProvider(TestUtils.zkAddress);
+        yarnDataService.start();
+        yarnDataService.delete("container_2");
+        yarnDataService.delete("container_4");
+        yarnDataService.delete("container_6");
+        yarnDataService.stop();
+        Thread.sleep(3000);
+        TestUtils.rebalanceTestCluster();
+        TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+    }
+
+    static void killProvider() throws Exception {
+        Iterator<Service> itService = TestUtils.providerServices.iterator();
+        itService.next().stop();
+        itService.remove();
+
+        TestUtils.rebalanceTestCluster();
+        TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+    }
+
+    LocalContainerProviderProcess[] makeLocalProviders(int count) throws Exception {
+        LocalContainerProviderProcess[] localProviders = new LocalContainerProviderProcess[count];
+        for (int i = 0; i < count; i++) {
+            localProviders[i] = TestUtils.makeLocalProvider("provider_" + i);
+        }
+        return localProviders;
+    }
+
+    ShellContainerProviderProcess[] makeShellProviders(int count) throws Exception {
+        ShellContainerProviderProcess[] shellProviders = new ShellContainerProviderProcess[count];
+        for (int i = 0; i < count; i++) {
+            shellProviders[i] = TestUtils.makeShellProvider("provider_" + i);
+        }
+        return shellProviders;
+    }
+
+    YarnContainerProviderProcess[] makeYarnProviders(int count) throws Exception {
+        YarnContainerProviderProcess[] yarnProviders = new YarnContainerProviderProcess[count];
+        for (int i = 0; i < count; i++) {
+            yarnProviders[i] = TestUtils.makeYarnProvider("provider_" + i);
+        }
+        return yarnProviders;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/LocalContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/LocalContainerProviderIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/LocalContainerProviderIT.java
new file mode 100644
index 0000000..94ea5ac
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/LocalContainerProviderIT.java
@@ -0,0 +1,80 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.autoscale.impl.local.LocalStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Local container provider and local status provider test. Scale-up and -down
+ * only, no failures.
+ * 
+ * @see LocalContainerProvider
+ * @see LocalStatusProvider
+ */
+@Test(groups = { "integration", "local" })
+public class LocalContainerProviderIT {
+
+    static final Logger           log             = Logger.getLogger(LocalContainerProviderIT.class);
+
+    static final int              CONTAINER_COUNT = 4;
+
+    StaticTargetProvider          clusterStatusProvider;
+    LocalContainerProviderProcess containerProvider;
+    LocalStatusProvider           containerStatusProvider;
+
+    @BeforeMethod(alwaysRun = true)
+    public void setupTest() throws Exception {
+        teardownTest();
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        containerProvider = TestUtils.makeLocalProvider("provider_0");
+        clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+        containerStatusProvider = new LocalStatusProvider();
+        TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+    }
+
+    @AfterMethod(alwaysRun = true)
+    public void teardownTest() throws Exception {
+        TestUtils.stopTestCluster();
+        TestUtils.stopZookeeper();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testStatic() throws Exception {
+        log.info("testing static");
+        setContainerCount(CONTAINER_COUNT);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleUp() throws Exception {
+        log.info("testing scale up");
+        setContainerCount(CONTAINER_COUNT + 2);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleDown() throws Exception {
+        log.info("testing scale down");
+        setContainerCount(CONTAINER_COUNT - 2);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleCycle() throws Exception {
+        log.info("testing scale cycle");
+        setContainerCount(CONTAINER_COUNT + 2);
+        setContainerCount(CONTAINER_COUNT);
+        setContainerCount(CONTAINER_COUNT - 2);
+        setContainerCount(CONTAINER_COUNT);
+    }
+
+    void setContainerCount(int newContainerCount) throws Exception {
+        log.debug(String.format("Setting container count to %d", newContainerCount));
+        clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+        TestUtils.rebalanceTestCluster();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/ShellContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/ShellContainerProviderIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/ShellContainerProviderIT.java
new file mode 100644
index 0000000..dce4429
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/ShellContainerProviderIT.java
@@ -0,0 +1,95 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.autoscale.impl.shell.ShellStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Shell container provider and shell status provider test. Scale-up and -down
+ * only, no failures.
+ * 
+ * @see ShellContainerProvider
+ * @see ShellStatusProvider
+ */
+@Test(groups = { "integration", "shell" })
+public class ShellContainerProviderIT {
+
+    static final Logger           log               = Logger.getLogger(ShellContainerProviderIT.class);
+
+    static final long             TEST_TIMEOUT      = 20000;
+    static final long             REBALANCE_TIMEOUT = 10000;
+
+    static final int              CONTAINER_COUNT   = 4;
+
+    StaticTargetProvider          clusterStatusProvider;
+    ShellContainerProviderProcess containerProvider;
+    ShellStatusProvider           containerStatusProvider;
+	
+	@BeforeClass(alwaysRun = true)
+	public void setupClass() {
+		log.info("installing shutdown hook");
+		Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+			@Override
+			public void run() {
+				try { teardownTest(); } catch(Exception ignore) {};
+			}
+		}));
+	}
+	
+    @BeforeMethod(alwaysRun = true)
+    public void setupTest() throws Exception {
+        teardownTest();
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        containerProvider = TestUtils.makeShellProvider("provider_0");
+        clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+        containerStatusProvider = new ShellStatusProvider();
+        TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+    }
+
+    @AfterMethod(alwaysRun = true)
+    public void teardownTest() throws Exception {
+        TestUtils.stopTestCluster();
+        TestUtils.stopZookeeper();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testStatic() throws Exception {
+        log.info("testing static");
+        setContainerCount(CONTAINER_COUNT);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleUp() throws Exception {
+        log.info("testing scale up");
+        setContainerCount(CONTAINER_COUNT + 2);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleDown() throws Exception {
+        log.info("testing scale down");
+        setContainerCount(CONTAINER_COUNT - 2);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleCycle() throws Exception {
+        log.info("testing scale cycle");
+        setContainerCount(CONTAINER_COUNT + 2);
+        setContainerCount(CONTAINER_COUNT);
+        setContainerCount(CONTAINER_COUNT - 2);
+        setContainerCount(CONTAINER_COUNT);
+    }
+
+    void setContainerCount(int newContainerCount) throws Exception {
+        log.debug(String.format("Setting container count to %d", newContainerCount));
+        clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+        TestUtils.rebalanceTestCluster();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtils.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtils.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtils.java
new file mode 100644
index 0000000..c68b2ca
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtils.java
@@ -0,0 +1,443 @@
+package org.apache.helix.autoscale;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.TimeoutException;
+
+import org.I0Itec.zkclient.IDefaultNameSpace;
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkServer;
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.StatusProvider;
+import org.apache.helix.autoscale.StatusProviderService;
+import org.apache.helix.autoscale.TargetProvider;
+import org.apache.helix.autoscale.TargetProviderService;
+import org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProperties;
+import org.apache.helix.autoscale.provider.ProviderProperties;
+import org.apache.helix.autoscale.provider.ProviderRebalancer;
+import org.apache.helix.autoscale.provider.ProviderRebalancerSingleton;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.ExternalView;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+/**
+ * Utility for creating a test cluster without the bootstrapping tool. Methods
+ * for verifying the number of active instances and partitions in a cluster.
+ * 
+ */
+public class TestUtils {
+
+    static final Logger                    log                      = Logger.getLogger(TestUtils.class);
+
+    public static int                      zkPort;
+    public static String                   zkAddress;
+    public static String                   resmanAddress;
+    public static String                   schedulerAddress;
+    public static String                   hdfsAddress;
+    public static String                   yarnUser;
+
+    public static final String             metaClusterName          = "meta-cluster";
+    public static final String             managedClusterName       = "managed-cluster";
+    public static final String             metaResourceName         = "container";
+    public static final String             managedResourceName      = "database";
+
+    public static final int                numManagedPartitions     = 10;
+    public static final int                numManagedReplica        = 2;
+
+    public static final long               TEST_TIMEOUT             = 120000;
+    public static final long               REBALANCE_TIMEOUT        = 60000;
+    public static final long               POLL_INTERVAL            = 1000;
+
+    public static final ProviderProperties providerProperties       = new ProviderProperties();
+
+    public static ZkServer                 server                   = null;
+    public static HelixAdmin               admin                    = null;
+    public static HelixManager             metaControllerManager    = null;
+    public static HelixManager             managedControllerManager = null;
+
+    public static Collection<Service>      providerServices         = new ArrayList<Service>();
+    public static Collection<Service>      auxServices              = new ArrayList<Service>();
+
+    public static TargetProvider           targetProvider           = null;
+    public static StatusProvider           statusProvider           = null;
+
+    static {
+        try {
+            configure();
+        } catch(Exception e) {
+            log.error("Could not setup TestUtils", e);
+            throw new RuntimeException(e);
+        }
+    }
+    
+    private TestUtils() {
+        // left blank
+    }
+    
+    public static void configure() throws IOException {
+        configure("standalone.properties");
+    }
+    
+    public static void configure(String resourcePath) throws IOException {
+        log.info(String.format("Configuring Test cluster from %s", resourcePath));
+        Properties properties = new Properties();
+        properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+        configure(properties);
+    }
+
+    public static void configure(Properties properties) {
+        log.info(String.format("Configuring from properties '%s'", properties));
+        
+        zkPort = Integer.valueOf(properties.getProperty("zookeeper.port"));
+        zkAddress = properties.getProperty("zookeeper.address");
+        resmanAddress = properties.getProperty("yarn.resourcemanager");
+        schedulerAddress = properties.getProperty("yarn.scheduler");
+        hdfsAddress = properties.getProperty("yarn.hdfs");
+        yarnUser = properties.getProperty("yarn.user");
+        
+        Preconditions.checkNotNull(zkPort);
+        Preconditions.checkNotNull(zkAddress);
+        Preconditions.checkNotNull(resmanAddress);
+        Preconditions.checkNotNull(schedulerAddress);
+        Preconditions.checkNotNull(hdfsAddress);
+        Preconditions.checkNotNull(yarnUser);
+        
+        configureInternal();
+    }
+    
+    static void configureInternal() {
+        providerProperties.clear();
+        providerProperties.setProperty(ProviderProperties.ADDRESS, zkAddress);
+        providerProperties.setProperty(ProviderProperties.CLUSTER, managedClusterName);
+        providerProperties.setProperty(ProviderProperties.METAADDRESS, zkAddress);
+        providerProperties.setProperty(ProviderProperties.METACLUSTER, metaClusterName);
+        providerProperties.setProperty(ProviderProperties.NAME, "<unknown>");
+    
+        Properties containerProperties = new Properties();
+        containerProperties.setProperty("class", "org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess");
+    
+        providerProperties.addContainer("container", containerProperties);
+    
+        log.info(String.format("Using provider properties '%s'", providerProperties));
+    }
+
+    public static void startZookeeper() throws Exception {
+        log.info("Starting ZooKeeper");
+
+        if (server != null)
+            throw new IllegalStateException("Zookeeper already running");
+
+        server = createLocalZookeeper();
+        server.start();
+    }
+
+    public static void stopZookeeper() throws Exception {
+        log.info("Stopping ZooKeeper");
+
+        if (server != null) {
+            server.shutdown();
+            server = null;
+        }
+    }
+
+    public static void startTestCluster(TargetProviderService targetProvider, StatusProviderService statusProvider, Service... containerProviderProcesses)
+            throws Exception {
+        log.debug(String.format("Starting test cluster"));
+
+        if (server == null)
+            throw new IllegalStateException("Zookeeper not running yet");
+
+        if (!auxServices.isEmpty() || !providerServices.isEmpty() || admin != null || metaControllerManager != null || managedControllerManager != null)
+            throw new IllegalStateException("TestCluster already running");
+
+        log.debug("Create admin");
+        admin = new ZKHelixAdmin(zkAddress);
+
+        log.debug("Create clusters");
+        admin.addCluster(metaClusterName, true);
+        admin.addCluster(managedClusterName, true);
+
+        log.debug("Setup config tool");
+        ProviderRebalancerSingleton.setTargetProvider(targetProvider);
+        ProviderRebalancerSingleton.setStatusProvider(statusProvider);
+
+        log.debug("Starting target and status provider");
+        TestUtils.targetProvider = startAuxService(targetProvider);
+        TestUtils.statusProvider = startAuxService(statusProvider);
+
+        // Managed Cluster
+        log.debug("Setup managed cluster");
+        admin.addStateModelDef(managedClusterName, "MasterSlave", new StateModelDefinition(StateModelConfigGenerator.generateConfigForMasterSlave()));
+        admin.addResource(managedClusterName, managedResourceName, numManagedPartitions, "MasterSlave", RebalanceMode.FULL_AUTO.toString());
+        IdealState managedIdealState = admin.getResourceIdealState(managedClusterName, managedResourceName);
+        managedIdealState.setInstanceGroupTag(metaResourceName);
+        managedIdealState.setReplicas(String.valueOf(numManagedReplica));
+        admin.setResourceIdealState(managedClusterName, managedResourceName, managedIdealState);
+
+        // Meta Cluster
+        log.debug("Setup meta cluster");
+        admin.addStateModelDef(metaClusterName, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+        admin.addResource(metaClusterName, metaResourceName, targetProvider.getTargetContainerCount(metaResourceName), "OnlineOffline",
+                RebalanceMode.USER_DEFINED.toString());
+
+        IdealState idealState = admin.getResourceIdealState(metaClusterName, metaResourceName);
+        idealState.setRebalancerClassName(ProviderRebalancer.class.getName());
+        idealState.setReplicas("1");
+
+        // BEGIN workaround
+        // FIXME workaround for HELIX-226
+        Map<String, List<String>> listFields = Maps.newHashMap();
+        Map<String, Map<String, String>> mapFields = Maps.newHashMap();
+        for (int i = 0; i < 256; i++) {
+            String partitionName = metaResourceName + "_" + i;
+            listFields.put(partitionName, new ArrayList<String>());
+            mapFields.put(partitionName, new HashMap<String, String>());
+        }
+        idealState.getRecord().setListFields(listFields);
+        idealState.getRecord().setMapFields(mapFields);
+        // END workaround
+
+        admin.setResourceIdealState(metaClusterName, metaResourceName, idealState);
+
+        log.debug("Starting container providers");
+        for (Service service : containerProviderProcesses) {
+            startProviderService(service);
+        }
+
+        log.debug("Starting managed cluster controller");
+        managedControllerManager = HelixControllerMain.startHelixController(zkAddress, managedClusterName, "managedController", HelixControllerMain.STANDALONE);
+
+        log.debug("Starting meta cluster controller");
+        metaControllerManager = HelixControllerMain.startHelixController(zkAddress, metaClusterName, "metaController", HelixControllerMain.STANDALONE);
+
+        log.debug("Waiting for stable state");
+        waitUntilRebalancedCount(targetProvider.getTargetContainerCount(metaResourceName));
+    }
+
+    public static void stopTestCluster() throws Exception {
+        log.debug(String.format("Stopping test cluster"));
+        if (managedControllerManager != null) {
+            log.info("Disconnecting managed cluster controller");
+            managedControllerManager.disconnect();
+        }
+        if (metaControllerManager != null) {
+            log.info("Disconnecting meta cluster controller");
+            metaControllerManager.disconnect();
+        }
+        log.info("Stopping provider services");
+        if (providerServices != null) {
+            for (Service service : providerServices) {
+                service.stop();
+            }
+            providerServices.clear();
+        }
+        log.debug("Stopping auxillary services");
+        if (auxServices != null) {
+            for (Service service : auxServices) {
+                service.stop();
+            }
+            auxServices.clear();
+        }
+
+        admin = null;
+        metaControllerManager = null;
+        managedControllerManager = null;
+    }
+
+    public static <T extends Service> T startAuxService(T service) throws Exception {
+        auxServices.add(service);
+        service.start();
+        return service;
+    }
+
+    public static <T extends Service> T startProviderService(T service) throws Exception {
+        providerServices.add(service);
+        service.start();
+        return service;
+    }
+
+    public static void rebalanceTestCluster() throws Exception {
+        log.debug(String.format("Triggering rebalance"));
+        IdealState poke = admin.getResourceIdealState(metaClusterName, metaResourceName);
+        admin.setResourceIdealState(metaClusterName, metaResourceName, poke);
+
+        int current = targetProvider.getTargetContainerCount(TestUtils.metaResourceName);
+        waitUntilRebalancedCount(current);
+    }
+
+    public static void waitUntilRebalancedCount(int containerCount) throws Exception {
+        log.debug(String.format("Waiting for rebalance with %d containers at '%s'", containerCount, zkAddress));
+
+        HelixAdmin admin = new ZKHelixAdmin(zkAddress);
+
+        try {
+            long limit = System.currentTimeMillis() + REBALANCE_TIMEOUT;
+            waitUntilPartitionCount(admin, metaClusterName, metaResourceName, containerCount, (limit - System.currentTimeMillis()));
+            waitUntilInstanceCount(admin, metaClusterName, metaResourceName, providerServices.size(), (limit - System.currentTimeMillis()));
+            waitUntilPartitionCount(admin, managedClusterName, managedResourceName, numManagedPartitions, (limit - System.currentTimeMillis()));
+            
+            // FIXME workaround for Helix FULL_AUTO rebalancer not providing guarantees for cluster expansion
+            //waitUntilInstanceCount(admin, managedClusterName, managedResourceName, containerCount, (limit - System.currentTimeMillis()));
+        } catch (Exception e) {
+            throw e;
+        } finally {
+            admin.close();
+        }
+    }
+
+    public static void waitUntilInstanceCount(HelixAdmin admin, String cluster, String resource, int targetCount, long timeout) throws Exception {
+        log.debug(String.format("Waiting for instance count (cluster='%s', resource='%s', instanceCount=%d, timeout=%d)", cluster, resource, targetCount,
+                timeout));
+
+        long limit = System.currentTimeMillis() + timeout;
+        while (limit > System.currentTimeMillis()) {
+            int assignedCount = getAssingedInstances(admin, cluster, resource).size();
+            log.debug(String.format("checking instance count for '%s:%s': target=%d, current=%d", cluster, resource, targetCount, assignedCount));
+
+            if (targetCount == assignedCount) {
+                return;
+            }
+            Thread.sleep(POLL_INTERVAL);
+        }
+        throw new TimeoutException();
+    }
+
+    public static void waitUntilPartitionCount(HelixAdmin admin, String cluster, String resource, int targetCount, long timeout) throws Exception {
+        log.debug(String.format("Waiting for partition count (cluster='%s', resource='%s', partitionCount=%d, timeout=%d)", cluster, resource, targetCount,
+                timeout));
+
+        long limit = System.currentTimeMillis() + timeout;
+        while (limit > System.currentTimeMillis()) {
+            int assignedCount = getAssingedPartitions(admin, cluster, resource).size();
+            log.debug(String.format("checking partition count for '%s:%s': target=%d, current=%d", cluster, resource, targetCount, assignedCount));
+
+            if (targetCount == assignedCount) {
+                return;
+            }
+            Thread.sleep(POLL_INTERVAL);
+        }
+        throw new TimeoutException();
+    }
+
+    public static Set<String> getAssingedInstances(HelixAdmin admin, String clusterName, String resourceName) {
+        Set<String> assignedInstances = new HashSet<String>();
+
+        ExternalView externalView = admin.getResourceExternalView(clusterName, resourceName);
+
+        if (externalView == null)
+            return assignedInstances;
+
+        for (String partitionName : externalView.getPartitionSet()) {
+            Map<String, String> stateMap = externalView.getStateMap(partitionName);
+            if (stateMap == null)
+                continue;
+
+            for (String instanceName : stateMap.keySet()) {
+                String state = stateMap.get(instanceName);
+                if ("MASTER".equals(state) || "SLAVE".equals(state) || "ONLINE".equals(state)) {
+                    assignedInstances.add(instanceName);
+                }
+            }
+        }
+
+        return assignedInstances;
+    }
+
+    public static Set<String> getAssingedPartitions(HelixAdmin admin, String clusterName, String resourceName) {
+        Set<String> assignedPartitions = new HashSet<String>();
+
+        ExternalView externalView = admin.getResourceExternalView(clusterName, resourceName);
+
+        if (externalView == null)
+            return assignedPartitions;
+
+        for (String partitionName : externalView.getPartitionSet()) {
+            Map<String, String> stateMap = externalView.getStateMap(partitionName);
+            if (stateMap == null)
+                continue;
+
+            for (String instanceName : stateMap.keySet()) {
+                String state = stateMap.get(instanceName);
+                if ("MASTER".equals(state) || "ONLINE".equals(state)) {
+                    assignedPartitions.add(partitionName);
+                }
+            }
+        }
+
+        return assignedPartitions;
+    }
+
+    public static ZkServer createLocalZookeeper() throws Exception {
+        String baseDir = "/tmp/autoscale/";
+        final String dataDir = baseDir + "zk/dataDir";
+        final String logDir = baseDir + "zk/logDir";
+        FileUtils.deleteDirectory(new File(dataDir));
+        FileUtils.deleteDirectory(new File(logDir));
+
+        IDefaultNameSpace defaultNameSpace = new IDefaultNameSpace() {
+            @Override
+            public void createDefaultNameSpace(ZkClient zkClient) {
+
+            }
+        };
+        return new ZkServer(dataDir, logDir, defaultNameSpace, zkPort);
+    }
+
+    public static LocalContainerProviderProcess makeLocalProvider(String name) throws Exception {
+        LocalContainerProviderProcess process = new LocalContainerProviderProcess();
+        process.configure(makeProviderProperties(name));
+        return process;
+    }
+
+    public static ShellContainerProviderProcess makeShellProvider(String name) throws Exception {
+        ShellContainerProviderProcess process = new ShellContainerProviderProcess();
+        process.configure(makeProviderProperties(name));
+        return process;
+    }
+
+    public static YarnContainerProviderProcess makeYarnProvider(String name) throws Exception {
+        YarnContainerProviderProperties properties = new YarnContainerProviderProperties();
+
+        properties.putAll(makeProviderProperties(name));
+        properties.put(YarnContainerProviderProperties.YARNDATA, zkAddress);
+        properties.put(YarnContainerProviderProperties.RESOURCEMANAGER, resmanAddress);
+        properties.put(YarnContainerProviderProperties.SCHEDULER, schedulerAddress);
+        properties.put(YarnContainerProviderProperties.USER, yarnUser);
+        properties.put(YarnContainerProviderProperties.HDFS, hdfsAddress);
+
+        YarnContainerProviderProcess process = new YarnContainerProviderProcess();
+        process.configure(properties);
+
+        return process;
+    }
+
+    static ProviderProperties makeProviderProperties(String name) {
+        ProviderProperties properties = new ProviderProperties();
+        properties.putAll(providerProperties);
+        properties.setProperty(ProviderProperties.NAME, name);
+        return properties;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtilsUT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtilsUT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtilsUT.java
new file mode 100644
index 0000000..d55d7a4
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtilsUT.java
@@ -0,0 +1,63 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.StatusProviderService;
+import org.apache.helix.autoscale.TargetProviderService;
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.local.LocalStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.Test;
+
+/**
+ * Self-test of test cluster. Spawning zookeeper and cluster with single provider and single instance.
+ * 
+ * @see TestUtils
+ */
+@Test(groups={"unit"})
+public class TestUtilsUT {
+
+	static final Logger log = Logger.getLogger(TestUtilsUT.class);
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testZookeeper() throws Exception {
+		log.info("testing zookeeper");
+	    TestUtils.configure();
+		TestUtils.startZookeeper();
+		TestUtils.stopZookeeper();
+	}
+
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testCluster() throws Exception {
+		log.info("testing cluster");
+        TestUtils.configure();
+		TestUtils.startZookeeper();
+		
+		TestUtils.startTestCluster(new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, 1)),
+		        new LocalStatusProvider(), TestUtils.makeLocalProvider("test"));
+		TestUtils.stopTestCluster();
+		
+		TestUtils.stopZookeeper();
+	}
+
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testClusterRepeated() throws Exception {
+		log.info("testing cluster restart");
+        TestUtils.configure();
+		TestUtils.startZookeeper();
+		
+		TargetProviderService statusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, 1));
+		Service containerProvider = TestUtils.makeLocalProvider("test");
+		StatusProviderService containerStatusProvider = new LocalStatusProvider();
+		
+		TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+		TestUtils.stopTestCluster();
+
+		TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+		TestUtils.stopTestCluster();
+
+		TestUtils.stopZookeeper();
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/YarnContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/YarnContainerProviderIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/YarnContainerProviderIT.java
new file mode 100644
index 0000000..78a0bf8
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/YarnContainerProviderIT.java
@@ -0,0 +1,101 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProperties;
+import org.apache.helix.autoscale.impl.yarn.YarnStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Yarn container provider and yarn status provider test. Scale-up and -down
+ * only, no failures.
+ * 
+ * @see YarnContainerProvider
+ * @see YarnStatusProvider
+ */
+@Test(groups={"integration", "yarn"})
+public class YarnContainerProviderIT {
+
+    static final Logger             log             = Logger.getLogger(YarnContainerProviderIT.class);
+
+    static final int                CONTAINER_COUNT = 4;
+
+    StaticTargetProvider            clusterStatusProvider;
+    YarnContainerProviderProcess    containerProvider;
+    YarnStatusProvider              containerStatusProvider;
+
+    YarnContainerProviderProperties properties;
+
+	@BeforeClass(alwaysRun = true)
+	public void setupClass() throws Exception {
+		log.info("installing shutdown hook");
+		Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+			@Override
+			public void run() {
+				try { teardownTest(); } catch(Exception ignore) {};
+			}
+		}));
+	}
+	
+	@BeforeMethod(alwaysRun = true)
+	public void setupTest() throws Exception {
+		log.debug("setting up yarn test case");
+		
+		teardownTest();
+		TestUtils.configure("distributed.properties");
+		TestUtils.startZookeeper();
+		
+		containerProvider = TestUtils.makeYarnProvider("provider_0");
+		containerStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+		clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+		TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+		
+		log.debug("running yarn test case");
+	}
+	
+	@AfterMethod(alwaysRun = true)
+	public void teardownTest() throws Exception {
+		log.debug("cleaning up yarn test case");
+		TestUtils.stopTestCluster();
+		TestUtils.stopZookeeper();
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testStatic() throws Exception {
+		log.info("testing static");
+		setContainerCount(CONTAINER_COUNT);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleUp() throws Exception {
+		log.info("testing scale up");
+		setContainerCount(CONTAINER_COUNT + 2);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleDown() throws Exception {
+		log.info("testing scale down");
+		setContainerCount(CONTAINER_COUNT - 2);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleCycle() throws Exception {
+		log.info("testing scale cycle");
+		setContainerCount(CONTAINER_COUNT + 2);
+		setContainerCount(CONTAINER_COUNT);
+		setContainerCount(CONTAINER_COUNT - 2);
+		setContainerCount(CONTAINER_COUNT);
+	}
+	
+	void setContainerCount(int newContainerCount) throws Exception {
+		log.debug(String.format("Setting container count to %d", newContainerCount));
+		clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+		TestUtils.rebalanceTestCluster();
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/resources/distributed.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/resources/distributed.properties b/recipes/auto-scale/src/test/resources/distributed.properties
new file mode 100644
index 0000000..47fd8e0
--- /dev/null
+++ b/recipes/auto-scale/src/test/resources/distributed.properties
@@ -0,0 +1,13 @@
+#
+# zookeeper will be spawned locally by TestUtils
+#
+zookeeper.port=2199
+zookeeper.address=rm:2199
+
+#
+# YARN and HDFS must be set up manually
+#
+yarn.resourcemanager=rm:8032
+yarn.scheduler=rm:8030
+yarn.hdfs=hdfs://rm:9000/
+yarn.user=yarn
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/resources/log4j.properties b/recipes/auto-scale/src/test/resources/log4j.properties
new file mode 100644
index 0000000..65800cc
--- /dev/null
+++ b/recipes/auto-scale/src/test/resources/log4j.properties
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Set root logger level to DEBUG and its only appender to R.
+log4j.rootLogger=ERROR, C
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.C=org.apache.log4j.ConsoleAppender
+log4j.appender.C.layout=org.apache.log4j.PatternLayout
+log4j.appender.C.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.I0Itec=ERROR
+log4j.logger.org.apache=ERROR
+
+log4j.logger.org.apache.helix.autoscale=DEBUG

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/resources/standalone.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/resources/standalone.properties b/recipes/auto-scale/src/test/resources/standalone.properties
new file mode 100644
index 0000000..d4b4e86
--- /dev/null
+++ b/recipes/auto-scale/src/test/resources/standalone.properties
@@ -0,0 +1,13 @@
+#
+# zookeeper will be spawned locally by TestUtils
+#
+zookeeper.port=2199
+zookeeper.address=localhost:2199
+
+#
+# YARN and HDFS must be set up manually
+#
+yarn.resourcemanager=localhost:8032
+yarn.scheduler=localhost:8030
+yarn.hdfs=hdfs://localhost:9000/
+yarn.user=yarn
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/README.md
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/README.md b/recipes/meta-cluster-manager/README.md
new file mode 100644
index 0000000..9a8acf4
--- /dev/null
+++ b/recipes/meta-cluster-manager/README.md
@@ -0,0 +1,82 @@
+<!---
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+Meta cluster manager
+------------------------
+Auto-scaling for helix clusters using a meta cluster. The managed cluster operates as usual, managing resources and instances via AUTO_REBALANCE. The meta cluster monitors the managed cluster and injects or removes instances based on demand.
+
+The meta cluster makes decisions about scaling up or down based on information obtained from a "ClusterStatusProvider". A custom "ProviderRebalancer" is invoked testing the health of existing participants in the managed cluster with the "ContainerStatusProvider". If participants need to be (re-)deployed the "ContainerProvider" is invoked to instantiate and inject participants in the managed cluster.
+
+ContainerProviders are the participants of the meta cluster and there are multiple different implementations of the "ContainerProvider". First, the "LocalContainerProvider" spawns VM-local participants, i.e. participants of the managed cluster are spawned in the same VM the container provider exists. This is mainly useful for testing. Second, the "ShellContainerProvider" spawns a separate VM process for each participant using shell commands. Third, the "YarnContainerProvider" creates processes as container on a YARN cluster and manages their status using an external meta-data service (Zookeeper in this implementation). This implementation is fairly complex and has a number of external dependencies on a working YARN cluster and running services.
+
+Even though there are different types of providers the notion of a "ContainerProcess" abstracts implementation specifics. A process implementation inherits from "ContainerProcess" and can be instantiated by all three types of container providers. CAUTION: since separate VM process might be used a VM external method for coordination is required (e.g. Zookeeper)
+
+Configuration settings are passed throughout the application using traditional Properties objects. The "ConfigTool" contains default paths and helps to inject dependencies in the ProviderRebalancer.
+
+The application can be run and tested in three ways. First, a comprehensive suite of unit and integration tests can be run using "mvn verify". Second, the "Bootstrapper" can deploy a live managed and meta cluster based on a specification (e.g. "2by2shell.properties"), Third, the "MetaManagerDemo" deploys a test cluster and allows the user to step through a cycle of scale-up and scale-down as well as simulated container and container provider failures. 
+
+
+The IdealState of the meta cluster uses the ONLINE-OFFLINE model and maps as follows in the axample below:
+
+Resource: type of container, e.g. database, webserver
+Partition: container id
+Instance: responsible container provider
+
+META:
+
+database
+  database_0
+    provider_0 : ONLINE
+  database_1
+    provider_1 : ONLINE
+webserver
+  webserver_0
+    provider_0 : ONLINE
+  webserver_1
+    provider_1 : ONLINE
+  webserver_2
+    provider_0 : ONLINE
+
+      
+MANAGED:
+
+dbprod (tag=database)
+  dbprod_0
+    database_0 : MASTER
+    database_1 : SLAVE
+  dbprod_1
+    database_0 : SLAVE
+    database_1 : MASTER
+  dbprod_2
+    database_0 : MASTER
+    database_1 : SLAVE
+wsprod (tag=webserver)
+  wsprod_0
+    webserver_0 : ONLINE
+  wsprod_1
+    webserver_1 : ONLINE
+  wsprod_2
+    webserver_2 : ONLINE
+  wsprod_3
+    webserver_0 : ONLINE
+  wsprod_4
+    webserver_1 : ONLINE
+  wsprod_5
+    webserver_2 : ONLINE
+    
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/pom.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/pom.xml b/recipes/meta-cluster-manager/pom.xml
new file mode 100644
index 0000000..ba5eb69
--- /dev/null
+++ b/recipes/meta-cluster-manager/pom.xml
@@ -0,0 +1,210 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.helix.recipes</groupId>
+    <artifactId>recipes</artifactId>
+    <version>0.6.2-incubating-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>meta-cluster-manager</artifactId>
+  <packaging>jar</packaging>
+  <name>Apache Helix :: Recipes :: meta cluster manager</name>
+  
+  <properties>
+    <hadoop.version>0.23.9</hadoop.version>
+
+    <ut.groups>unit</ut.groups>
+    <it.groups>local, shell</it.groups>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.testng</groupId>
+      <artifactId>testng</artifactId>
+      <version>6.0.1</version>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>junit</groupId>
+          <artifactId>junit</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.google.code.gson</groupId>
+      <artifactId>gson</artifactId>
+      <version>2.2.4</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.helix</groupId>
+      <artifactId>helix-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.mail</groupId>
+          <artifactId>mail</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.jms</groupId>
+          <artifactId>jms</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jdmk</groupId>
+          <artifactId>jmxtools</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jmx</groupId>
+          <artifactId>jmxri</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>14.0.1</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-api</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-common</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
+  </dependencies>
+  
+  <build>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>org.codehaus.mojo</groupId>
+          <artifactId>appassembler-maven-plugin</artifactId>
+          <configuration>
+            <configurationDirectory>conf</configurationDirectory>
+            <copyConfigurationDirectory>true</copyConfigurationDirectory>
+            <includeConfigurationDirectoryInClasspath>true</includeConfigurationDirectoryInClasspath>
+            <assembleDirectory>${project.build.directory}/metamanager-pkg</assembleDirectory>
+            <extraJvmArguments>-Xms512m -Xmx512m</extraJvmArguments>
+            <platforms>
+              <platform>unix</platform>
+            </platforms>
+          </configuration>
+          <executions>
+            <execution>
+              <phase>package</phase>
+              <goals>
+                <goal>assemble</goal>
+              </goals>
+            </execution>
+          </executions>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.rat</groupId>
+          <artifactId>apache-rat-plugin</artifactId>
+            <configuration>
+              <excludes combine.children="append">
+              </excludes>
+            </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+    
+    <plugins>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>appassembler-maven-plugin</artifactId>
+        <configuration>
+          <programs>
+            <program>
+              <mainClass>org.apache.helix.metamanager.bootstrapper.Boot</mainClass>
+              <name>boot</name>
+            </program>
+            <program>
+              <mainClass>org.apache.helix.metamanager.impl.shell.ShellContainerProcess</mainClass>
+              <name>shell-container-process</name>
+            </program>
+            <program>
+              <mainClass>org.apache.helix.metamanager.impl.yarn.YarnMasterProcess</mainClass>
+              <name>yarn-master-process</name>
+            </program>
+            <program>
+              <mainClass>org.apache.helix.metamanager.impl.yarn.YarnContainerProcess</mainClass>
+              <name>yarn-container-process</name>
+            </program>
+            <program>
+              <mainClass>org.apache.helix.metamanager.ZookeeperSetter</mainClass>
+              <name>zookeeper-setter</name>
+            </program>
+          </programs>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>single</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <finalName>metamanager</finalName>
+          <descriptor>src/main/assembly/assembly.xml</descriptor>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <groups>${ut.groups}</groups>
+          <excludedGroups>integration</excludedGroups>
+          <suiteXmlFiles>
+            <suiteXmlFile>src/test/config/testng.xml</suiteXmlFile>
+          </suiteXmlFiles>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-failsafe-plugin</artifactId>
+        <configuration>
+          <groups>${it.groups}</groups>
+          <excludedGroups>unit</excludedGroups>
+          <suiteXmlFiles>
+            <suiteXmlFile>src/test/config/testng.xml</suiteXmlFile>
+          </suiteXmlFiles>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>integration-test</goal>
+              <goal>verify</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+  
+  <profiles>
+    <profile>
+      <id>yarn</id>
+      <properties>
+        <it.groups>yarn</it.groups>
+      </properties>
+    </profile>
+  </profiles>
+</project>