You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@helix.apache.org by ki...@apache.org on 2013/09/20 20:30:10 UTC

[01/15] Adding Helix-task-framework and Yarn integration modules

Updated Branches:
  refs/heads/helix-yarn [created] e38aa54b0


http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/FailoverIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/FailoverIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/FailoverIT.java
new file mode 100644
index 0000000..17a6047
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/FailoverIT.java
@@ -0,0 +1,172 @@
+package org.apache.helix.metamanager.integration;
+
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.TestUtils;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.metamanager.impl.local.LocalContainerSingleton;
+import org.apache.helix.metamanager.impl.local.LocalStatusProvider;
+import org.apache.helix.metamanager.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.metamanager.impl.shell.ShellContainerSingleton;
+import org.apache.helix.metamanager.impl.shell.ShellStatusProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.metamanager.impl.yarn.YarnStatusProvider;
+import org.apache.helix.metamanager.impl.yarn.ZookeeperMetadataProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+public class FailoverIT {
+	
+	static final Logger log = Logger.getLogger(FailoverIT.class);
+	
+	static final int CONTAINER_COUNT = 7;
+
+	StaticTargetProvider targetProvider;
+	YarnStatusProvider yarnStatusProvider;
+	
+	@BeforeClass
+	public void setupClass() {
+		log.info("installing shutdown hook");
+		Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+			@Override
+			public void run() {
+				try { teardownTest(); } catch(Exception ignore) {};
+			}
+		}));
+	}
+	
+	@BeforeMethod
+	public void setupTest() throws Exception {
+		teardownTest();
+		TestUtils.startZookeeper();
+		targetProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+	}
+	
+	@AfterMethod
+	public void teardownTest() throws Exception {
+		TestUtils.stopTestCluster();
+		
+		if(yarnStatusProvider != null) {
+		    yarnStatusProvider.stop();
+		    yarnStatusProvider = null;
+		}
+		
+		TestUtils.stopZookeeper();
+	}
+	
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testLocalContainerFailover() throws Exception {
+		log.info("testing local container failover");
+		TestUtils.startTestCluster(targetProvider, new LocalStatusProvider(), makeLocalProviders(3));
+		killLocalContainers();
+	}
+	
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testLocalProviderFailover() throws Exception {
+		log.info("testing local provider failover");
+		TestUtils.startTestCluster(targetProvider, new LocalStatusProvider(), makeLocalProviders(3));
+		killProvider();
+	}
+	
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testShellContainerFailover() throws Exception {
+		log.info("testing shell container failover");
+		TestUtils.startTestCluster(targetProvider, new ShellStatusProvider(), makeShellProviders(3));
+		killShellContainers();
+	}
+	
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testShellProviderFailover() throws Exception {
+		log.info("testing shell provider failover");
+		TestUtils.startTestCluster(targetProvider, new ShellStatusProvider(), makeShellProviders(3));
+		killProvider();
+	}
+	
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testYarnContainerFailover() throws Exception {
+		log.info("testing yarn container failover");
+		yarnStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+		yarnStatusProvider.start();
+		TestUtils.startTestCluster(targetProvider, yarnStatusProvider, makeYarnProviders(3));
+		killYarnContainers();
+	}
+	
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testYarnProviderFailover() throws Exception {
+		log.info("testing yarn provider failover");
+		yarnStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+		yarnStatusProvider.start();
+		TestUtils.startTestCluster(targetProvider, yarnStatusProvider, makeYarnProviders(3));
+		killProvider();
+	}
+	
+	void killLocalContainers() throws Exception {
+	    LocalContainerSingleton.killProcess("container_2");
+        LocalContainerSingleton.killProcess("container_4");
+        LocalContainerSingleton.killProcess("container_6");
+        Thread.sleep(3000);
+		TestUtils.rebalanceTestCluster();
+		TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+	}
+	
+    void killShellContainers() throws Exception {
+        ShellContainerSingleton.killProcess("container_2");
+        ShellContainerSingleton.killProcess("container_4");
+        ShellContainerSingleton.killProcess("container_6");
+        Thread.sleep(3000);
+        TestUtils.rebalanceTestCluster();
+        TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+    }
+    
+    void killYarnContainers() throws Exception {
+        ZookeeperMetadataProvider metadata = new ZookeeperMetadataProvider(TestUtils.zkAddress);
+        metadata.start();
+        metadata.delete("container_2");
+        metadata.delete("container_4");
+        metadata.delete("container_6");
+        metadata.stop();
+        Thread.sleep(3000);
+        TestUtils.rebalanceTestCluster();
+        TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+    }
+    
+	static void killProvider() throws Exception {
+		Iterator<Service> itService = TestUtils.providerServices.iterator();
+		itService.next().stop();
+		itService.remove();
+		
+		TestUtils.rebalanceTestCluster();
+		TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+	}
+	
+	LocalContainerProviderProcess[] makeLocalProviders(int count) throws Exception {
+	    LocalContainerProviderProcess[] localProviders = new LocalContainerProviderProcess[count];
+		for(int i=0; i<count; i++) {
+		    localProviders[i] = TestUtils.makeLocalProvider("provider_" + i);
+		}
+		return localProviders;
+	}
+	
+	ShellContainerProviderProcess[] makeShellProviders(int count) throws Exception {
+	    ShellContainerProviderProcess[] shellProviders = new ShellContainerProviderProcess[count];
+		for(int i=0; i<count; i++) {
+		    shellProviders[i] = TestUtils.makeShellProvider("provider_" + i);
+		}
+		return shellProviders;
+	}
+	
+	YarnContainerProviderProcess[] makeYarnProviders(int count) throws Exception {
+	    YarnContainerProviderProcess[] yarnProviders = new YarnContainerProviderProcess[count];
+		for(int i=0; i<count; i++) {
+		    yarnProviders[i] = TestUtils.makeYarnProvider("provider_" + i);
+		}
+		return yarnProviders;
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/LocalContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/LocalContainerProviderIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/LocalContainerProviderIT.java
new file mode 100644
index 0000000..6f9b6df
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/LocalContainerProviderIT.java
@@ -0,0 +1,72 @@
+package org.apache.helix.metamanager.integration;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.TestUtils;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.metamanager.impl.local.LocalStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+public class LocalContainerProviderIT {
+	
+	static final Logger log = Logger.getLogger(LocalContainerProviderIT.class);
+	
+	static final int CONTAINER_COUNT = 4;
+
+	StaticTargetProvider clusterStatusProvider;
+	LocalContainerProviderProcess containerProvider;
+	LocalStatusProvider containerStatusProvider;
+	
+	@BeforeMethod
+	public void setupTest() throws Exception {
+		teardownTest();
+		TestUtils.startZookeeper();
+		containerProvider = TestUtils.makeLocalProvider("provider_0");
+		clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+		containerStatusProvider = new LocalStatusProvider();
+		TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+	}
+	
+	@AfterMethod
+	public void teardownTest() throws Exception {
+		TestUtils.stopTestCluster();
+		TestUtils.stopZookeeper();
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testStatic() throws Exception {
+		log.info("testing static");
+		setContainerCount(CONTAINER_COUNT);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleUp() throws Exception {
+		log.info("testing scale up");
+		setContainerCount(CONTAINER_COUNT + 2);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleDown() throws Exception {
+		log.info("testing scale down");
+		setContainerCount(CONTAINER_COUNT - 2);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleCycle() throws Exception {
+		log.info("testing scale cycle");
+		setContainerCount(CONTAINER_COUNT + 2);
+		setContainerCount(CONTAINER_COUNT);
+		setContainerCount(CONTAINER_COUNT - 2);
+		setContainerCount(CONTAINER_COUNT);
+	}
+	
+	void setContainerCount(int newContainerCount) throws Exception {
+		log.debug(String.format("Setting container count to %d", newContainerCount));
+		clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+		TestUtils.rebalanceTestCluster();
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/MultipleProviderFailoverIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/MultipleProviderFailoverIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/MultipleProviderFailoverIT.java
new file mode 100644
index 0000000..1c7edc7
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/MultipleProviderFailoverIT.java
@@ -0,0 +1,148 @@
+package org.apache.helix.metamanager.integration;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.StaticStatusProvider;
+import org.apache.helix.metamanager.TestUtils;
+import org.apache.helix.metamanager.impl.local.LocalContainerProvider;
+import org.apache.helix.metamanager.impl.local.LocalContainerSingleton;
+import org.apache.helix.metamanager.impl.local.LocalContainerStatusProvider;
+import org.apache.helix.metamanager.impl.shell.ShellContainerProvider;
+import org.apache.helix.metamanager.impl.shell.ShellContainerSingleton;
+import org.apache.helix.metamanager.impl.shell.ShellContainerStatusProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+public class MultipleProviderFailoverIT {
+	
+	static final Logger log = Logger.getLogger(MultipleProviderFailoverIT.class);
+	
+	static final long TEST_TIMEOUT = 60000;
+	static final long REBALANCE_TIMEOUT = 30000;
+
+	static final int CONTAINER_COUNT = 7;
+
+	StaticStatusProvider clusterStatusProvider;
+	
+	YarnContainerStatusProvider yarnStatusProvider;
+	
+	@BeforeClass
+	public void setupClass() {
+		log.info("installing shutdown hook");
+		Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+			@Override
+			public void run() {
+				try { teardownTest(); } catch(Exception ignore) {};
+			}
+		}));
+	}
+	
+	@BeforeMethod
+	public void setupTest() throws Exception {
+		teardownTest();
+		TestUtils.startZookeeper();
+		clusterStatusProvider = new StaticStatusProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+	}
+	
+	@AfterMethod
+	public void teardownTest() throws Exception {
+		TestUtils.stopTestCluster();
+		LocalContainerSingleton.reset();
+		ShellContainerSingleton.reset();
+		if(yarnStatusProvider != null) {
+			yarnStatusProvider.stopService();
+			yarnStatusProvider = null;
+		}
+		TestUtils.stopZookeeper();
+	}
+	
+	@Test(timeOut = TEST_TIMEOUT)
+	public void testLocalContainerFailover() throws Exception {
+		log.info("testing local container failover");
+		TestUtils.startTestCluster(clusterStatusProvider, new LocalContainerStatusProvider(), makeLocalProviders(3));
+		killContainers();
+	}
+	
+	@Test(timeOut = TEST_TIMEOUT)
+	public void testLocalProviderFailover() throws Exception {
+		log.info("testing local provider failover");
+		TestUtils.startTestCluster(clusterStatusProvider, new LocalContainerStatusProvider(), makeLocalProviders(3));
+		killProvider();
+	}
+	
+	@Test(timeOut = TEST_TIMEOUT)
+	public void testShellContainerFailover() throws Exception {
+		log.info("testing shell container failover");
+		TestUtils.startTestCluster(clusterStatusProvider, new ShellContainerStatusProvider(), makeShellProviders(3));
+		killContainers();
+	}
+	
+	@Test(timeOut = TEST_TIMEOUT)
+	public void testShellProviderFailover() throws Exception {
+		log.info("testing shell provider failover");
+		TestUtils.startTestCluster(clusterStatusProvider, new ShellContainerStatusProvider(), makeShellProviders(3));
+		killProvider();
+	}
+	
+	@Test(timeOut = TEST_TIMEOUT)
+	public void testYarnContainerFailover() throws Exception {
+		log.info("testing yarn container failover");
+		yarnStatusProvider = new YarnContainerStatusProvider(TestUtils.zkAddress);
+		yarnStatusProvider.startService();
+		TestUtils.startTestCluster(clusterStatusProvider, yarnStatusProvider, makeYarnProviders(3));
+		killContainers();
+	}
+	
+	@Test(timeOut = TEST_TIMEOUT)
+	public void testYarnProviderFailover() throws Exception {
+		log.info("testing yarn provider failover");
+		yarnStatusProvider = new YarnContainerStatusProvider(TestUtils.zkAddress);
+		yarnStatusProvider.startService();
+		TestUtils.startTestCluster(clusterStatusProvider, yarnStatusProvider, makeYarnProviders(3));
+		killProvider();
+	}
+	
+	static void killContainers() throws Exception {
+		TestUtils.containerProviders.get(1).destroy("container_2");
+		TestUtils.containerProviders.get(1).destroy("container_4");
+		TestUtils.containerProviders.get(1).destroy("container_6");
+		TestUtils.rebalanceTestCluster();
+		TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT, REBALANCE_TIMEOUT);
+	}
+	
+	static void killProvider() throws Exception {
+		TestUtils.managerProcesses.get(1).stop();
+		TestUtils.rebalanceTestCluster();
+		TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT, REBALANCE_TIMEOUT);
+	}
+	
+	static LocalContainerProvider[] makeLocalProviders(int count) {
+		LocalContainerProvider[] providers = new LocalContainerProvider[count];
+		for(int i=0; i<count; i++) {
+			providers[i] = TestUtils.makeLocalProvider("provider_" + i);
+		}
+		return providers;
+	}
+	
+	static ShellContainerProvider[] makeShellProviders(int count) {
+		ShellContainerProvider[] providers = new ShellContainerProvider[count];
+		for(int i=0; i<count; i++) {
+			providers[i] = TestUtils.makeShellProvider("provider_" + i);
+		}
+		return providers;
+	}
+	
+	YarnContainerProvider[] makeYarnProviders(int count) throws Exception {
+		YarnContainerProvider[] providers = new YarnContainerProvider[count];
+		for(int i=0; i<count; i++) {
+			providers[i] = TestUtils.makeYarnProvider("provider_" + i);
+		}
+		return providers;
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/ShellContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/ShellContainerProviderIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/ShellContainerProviderIT.java
new file mode 100644
index 0000000..19d37a1
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/ShellContainerProviderIT.java
@@ -0,0 +1,87 @@
+package org.apache.helix.metamanager.integration;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.TestUtils;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.metamanager.impl.shell.ShellStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+public class ShellContainerProviderIT {
+	
+	static final Logger log = Logger.getLogger(ShellContainerProviderIT.class);
+	
+	static final long TEST_TIMEOUT = 20000;
+	static final long REBALANCE_TIMEOUT = 10000;
+
+	static final int CONTAINER_COUNT = 4;
+
+	StaticTargetProvider clusterStatusProvider;
+	ShellContainerProviderProcess containerProvider;
+	ShellStatusProvider containerStatusProvider;
+	
+	@BeforeClass
+	public void setupClass() {
+		log.info("installing shutdown hook");
+		Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+			@Override
+			public void run() {
+				try { teardownTest(); } catch(Exception ignore) {};
+			}
+		}));
+	}
+	
+	@BeforeMethod
+	public void setupTest() throws Exception {
+		teardownTest();
+		TestUtils.startZookeeper();
+		containerProvider = TestUtils.makeShellProvider("provider_0");
+		clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+		containerStatusProvider = new ShellStatusProvider();
+		TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+	}
+	
+	@AfterMethod
+	public void teardownTest() throws Exception {
+		TestUtils.stopTestCluster();
+		TestUtils.stopZookeeper();
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testStatic() throws Exception {
+		log.info("testing static");
+		setContainerCount(CONTAINER_COUNT);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleUp() throws Exception {
+		log.info("testing scale up");
+		setContainerCount(CONTAINER_COUNT + 2);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleDown() throws Exception {
+		log.info("testing scale down");
+		setContainerCount(CONTAINER_COUNT - 2);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleCycle() throws Exception {
+		log.info("testing scale cycle");
+		setContainerCount(CONTAINER_COUNT + 2);
+		setContainerCount(CONTAINER_COUNT);
+		setContainerCount(CONTAINER_COUNT - 2);
+		setContainerCount(CONTAINER_COUNT);
+	}
+	
+	void setContainerCount(int newContainerCount) throws Exception {
+		log.debug(String.format("Setting container count to %d", newContainerCount));
+		clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+		TestUtils.rebalanceTestCluster();
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/YarnContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/YarnContainerProviderIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/YarnContainerProviderIT.java
new file mode 100644
index 0000000..16a9ad6
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/YarnContainerProviderIT.java
@@ -0,0 +1,93 @@
+package org.apache.helix.metamanager.integration;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.TestUtils;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProperties;
+import org.apache.helix.metamanager.impl.yarn.YarnStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+public class YarnContainerProviderIT {
+	
+	static final Logger log = Logger.getLogger(YarnContainerProviderIT.class);
+	
+	static final int CONTAINER_COUNT = 4;
+
+	StaticTargetProvider clusterStatusProvider;
+	YarnContainerProviderProcess containerProvider;
+	YarnStatusProvider containerStatusProvider;
+	
+	YarnContainerProviderProperties properties;
+	
+	@BeforeClass
+	public void setupClass() throws Exception {
+		log.info("installing shutdown hook");
+		Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+			@Override
+			public void run() {
+				try { teardownTest(); } catch(Exception ignore) {};
+			}
+		}));
+	}
+	
+	@BeforeMethod
+	public void setupTest() throws Exception {
+		log.debug("setting up yarn test case");
+		
+		teardownTest();
+		TestUtils.startZookeeper();
+		
+		containerProvider = TestUtils.makeYarnProvider("provider_0");
+		containerStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+		clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+		TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+		
+		log.debug("running yarn test case");
+	}
+	
+	@AfterMethod
+	public void teardownTest() throws Exception {
+		log.debug("cleaning up yarn test case");
+		TestUtils.stopTestCluster();
+		TestUtils.stopZookeeper();
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testStatic() throws Exception {
+		log.info("testing static");
+		setContainerCount(CONTAINER_COUNT);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleUp() throws Exception {
+		log.info("testing scale up");
+		setContainerCount(CONTAINER_COUNT + 2);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleDown() throws Exception {
+		log.info("testing scale down");
+		setContainerCount(CONTAINER_COUNT - 2);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleCycle() throws Exception {
+		log.info("testing scale cycle");
+		setContainerCount(CONTAINER_COUNT + 2);
+		setContainerCount(CONTAINER_COUNT);
+		setContainerCount(CONTAINER_COUNT - 2);
+		setContainerCount(CONTAINER_COUNT);
+	}
+	
+	void setContainerCount(int newContainerCount) throws Exception {
+		log.debug(String.format("Setting container count to %d", newContainerCount));
+		clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+		TestUtils.rebalanceTestCluster();
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/unit/TestUtilsTestUT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/unit/TestUtilsTestUT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/unit/TestUtilsTestUT.java
new file mode 100644
index 0000000..9570b54
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/unit/TestUtilsTestUT.java
@@ -0,0 +1,62 @@
+package org.apache.helix.metamanager.unit;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.ClusterStatusProvider;
+import org.apache.helix.metamanager.ContainerProvider;
+import org.apache.helix.metamanager.ContainerStatusProvider;
+import org.apache.helix.metamanager.StaticStatusProvider;
+import org.apache.helix.metamanager.TestUtils;
+import org.apache.helix.metamanager.impl.local.LocalContainerSingleton;
+import org.apache.helix.metamanager.impl.local.LocalContainerStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class TestUtilsTestUT {
+
+	static final Logger log = Logger.getLogger(TestUtilsTestUT.class);
+	
+	@AfterMethod
+	public void teardownTest() throws Exception {
+		LocalContainerSingleton.reset();
+	}
+	
+	@Test
+	public void testZookeeper() throws Exception {
+		log.info("testing zookeeper");
+		TestUtils.startZookeeper();
+		TestUtils.stopZookeeper();
+	}
+
+	@Test
+	public void testCluster() throws Exception {
+		log.info("testing cluster");
+		TestUtils.startZookeeper();
+		
+		TestUtils.startTestCluster(new StaticStatusProvider(Collections.singletonMap(TestUtils.metaResourceName, 1)),
+				new LocalContainerStatusProvider(), TestUtils.makeLocalProvider("test"));
+		TestUtils.stopTestCluster();
+		
+		TestUtils.stopZookeeper();
+	}
+
+	@Test
+	public void testClusterRepeated() throws Exception {
+		log.info("testing cluster restart");
+		TestUtils.startZookeeper();
+		
+		ClusterStatusProvider statusProvider = new StaticStatusProvider(Collections.singletonMap(TestUtils.metaResourceName, 1));
+		ContainerProvider containerProvider = TestUtils.makeLocalProvider("test");
+		ContainerStatusProvider containerStatusProvider = new LocalContainerStatusProvider();
+		
+		TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+		TestUtils.stopTestCluster();
+
+		TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+		TestUtils.stopTestCluster();
+
+		TestUtils.stopZookeeper();
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/unit/TestUtilsUT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/unit/TestUtilsUT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/unit/TestUtilsUT.java
new file mode 100644
index 0000000..a4d5dd3
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/unit/TestUtilsUT.java
@@ -0,0 +1,55 @@
+package org.apache.helix.metamanager.unit;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.StatusProviderService;
+import org.apache.helix.metamanager.TargetProviderService;
+import org.apache.helix.metamanager.TestUtils;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.local.LocalStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.Test;
+
+public class TestUtilsUT {
+
+	static final Logger log = Logger.getLogger(TestUtilsUT.class);
+	
+	@Test
+	public void testZookeeper() throws Exception {
+		log.info("testing zookeeper");
+		TestUtils.startZookeeper();
+		TestUtils.stopZookeeper();
+	}
+
+	@Test
+	public void testCluster() throws Exception {
+		log.info("testing cluster");
+		TestUtils.startZookeeper();
+		
+		TestUtils.startTestCluster(new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, 1)),
+		        new LocalStatusProvider(), TestUtils.makeLocalProvider("test"));
+		TestUtils.stopTestCluster();
+		
+		TestUtils.stopZookeeper();
+	}
+
+	@Test
+	public void testClusterRepeated() throws Exception {
+		log.info("testing cluster restart");
+		TestUtils.startZookeeper();
+		
+		TargetProviderService statusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, 1));
+		Service containerProvider = TestUtils.makeLocalProvider("test");
+		StatusProviderService containerStatusProvider = new LocalStatusProvider();
+		
+		TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+		TestUtils.stopTestCluster();
+
+		TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+		TestUtils.stopTestCluster();
+
+		TestUtils.stopZookeeper();
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/resources/distributed.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/resources/distributed.properties b/recipes/meta-cluster-manager/src/test/resources/distributed.properties
new file mode 100644
index 0000000..47fd8e0
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/resources/distributed.properties
@@ -0,0 +1,13 @@
+#
+# zookeeper will be spawned locally by TestUtils
+#
+zookeeper.port=2199
+zookeeper.address=rm:2199
+
+#
+# YARN and HDFS must be set up manually
+#
+yarn.resourcemanager=rm:8032
+yarn.scheduler=rm:8030
+yarn.hdfs=hdfs://rm:9000/
+yarn.user=yarn
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/resources/log4j.properties b/recipes/meta-cluster-manager/src/test/resources/log4j.properties
new file mode 100644
index 0000000..57bc008
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/resources/log4j.properties
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Set root logger level to DEBUG and its only appender to R.
+log4j.rootLogger=ERROR, C
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.C=org.apache.log4j.ConsoleAppender
+log4j.appender.C.layout=org.apache.log4j.PatternLayout
+log4j.appender.C.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.I0Itec=ERROR
+log4j.logger.org.apache=ERROR
+
+log4j.logger.org.apache.helix.metamanager=DEBUG

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/resources/standalone.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/resources/standalone.properties b/recipes/meta-cluster-manager/src/test/resources/standalone.properties
new file mode 100644
index 0000000..d4b4e86
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/resources/standalone.properties
@@ -0,0 +1,13 @@
+#
+# zookeeper will be spawned locally by TestUtils
+#
+zookeeper.port=2199
+zookeeper.address=localhost:2199
+
+#
+# YARN and HDFS must be set up manually
+#
+yarn.resourcemanager=localhost:8032
+yarn.scheduler=localhost:8030
+yarn.hdfs=hdfs://localhost:9000/
+yarn.user=yarn
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/pom.xml
----------------------------------------------------------------------
diff --git a/recipes/pom.xml b/recipes/pom.xml
index 3667650..5c13e6b 100644
--- a/recipes/pom.xml
+++ b/recipes/pom.xml
@@ -30,6 +30,7 @@ under the License.
   <name>Apache Helix :: Recipes</name>
 
   <modules>
+    <module>auto-scale</module>
     <module>rabbitmq-consumer-group</module>
     <module>rsync-replicated-file-system</module>
     <module>distributed-lock-manager</module>


[13/15] Adding Helix-task-framework and Yarn integration modules

Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/test/java/org/apache/helix/integration/task/TestTaskRebalancer.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/integration/task/TestTaskRebalancer.java b/helix-core/src/test/java/org/apache/helix/integration/task/TestTaskRebalancer.java
new file mode 100644
index 0000000..dec884b
--- /dev/null
+++ b/helix-core/src/test/java/org/apache/helix/integration/task/TestTaskRebalancer.java
@@ -0,0 +1,330 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.integration.task;
+
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.helix.*;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.integration.ZkIntegrationTestBase;
+import org.apache.helix.task.*;
+import org.apache.helix.tools.ClusterSetup;
+import org.apache.helix.tools.ClusterStateVerifier;
+import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+
+/**
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TestTaskRebalancer extends ZkIntegrationTestBase
+{
+  private static final int NUM_NODES = 5;
+  private static final int START_PORT = 12918;
+  private static final String MASTER_SLAVE_STATE_MODEL = "MasterSlave";
+  private static final int NUM_PARTITIONS = 20;
+  private static final int NUM_REPLICAS = 3;
+  private final String CLUSTER_NAME = CLUSTER_PREFIX + "_" + getShortClassName();
+  private final Map<String, TestHelper.StartCMResult> _startCMResultMap = new HashMap<String, TestHelper.StartCMResult>();
+  private HelixManager _manager;
+  private TaskDriver _driver;
+
+  @BeforeClass
+  public void beforeClass()
+      throws Exception
+  {
+    String namespace = "/" + CLUSTER_NAME;
+    if (_gZkClient.exists(namespace))
+    {
+      _gZkClient.deleteRecursive(namespace);
+    }
+
+    ClusterSetup setupTool = new ClusterSetup(ZK_ADDR);
+    setupTool.addCluster(CLUSTER_NAME, true);
+    for (int i = 0; i < NUM_NODES; i++)
+    {
+      String storageNodeName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
+      setupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName);
+    }
+
+    // Set up target db
+    setupTool.addResourceToCluster(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB, NUM_PARTITIONS, MASTER_SLAVE_STATE_MODEL);
+    setupTool.rebalanceStorageCluster(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB, NUM_REPLICAS);
+
+    Map<String, TaskFactory> taskFactoryReg = new HashMap<String, TaskFactory>();
+    taskFactoryReg.put("Reindex", new TaskFactory()
+    {
+      @Override
+      public Task createNewTask(String config)
+      {
+        return new ReindexTask(config);
+      }
+    });
+
+    // start dummy participants
+    for (int i = 0; i < NUM_NODES; i++)
+    {
+      String instanceName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
+      TestHelper.StartCMResult result = TestUtil.startDummyProcess(ZK_ADDR, CLUSTER_NAME, instanceName, taskFactoryReg);
+      _startCMResultMap.put(instanceName, result);
+    }
+
+    // start controller
+    String controllerName = CONTROLLER_PREFIX + "_0";
+    TestHelper.StartCMResult startResult = TestHelper.startController(CLUSTER_NAME,
+                                                                      controllerName,
+                                                                      ZK_ADDR,
+                                                                      HelixControllerMain.STANDALONE);
+    _startCMResultMap.put(controllerName, startResult);
+
+    // create cluster manager
+    _manager = HelixManagerFactory.getZKHelixManager(CLUSTER_NAME, "Admin", InstanceType.ADMINISTRATOR, ZK_ADDR);
+    _manager.connect();
+    _driver = new TaskDriver(_manager);
+
+    boolean result = ClusterStateVerifier.verifyByZkCallback(new ClusterStateVerifier.MasterNbInExtViewVerifier(ZK_ADDR,
+                                                                                                                CLUSTER_NAME));
+    Assert.assertTrue(result);
+
+    result = ClusterStateVerifier.verifyByZkCallback(new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR,
+                                                                                                           CLUSTER_NAME));
+    Assert.assertTrue(result);
+  }
+
+  @AfterClass
+  public void afterClass()
+      throws Exception
+  {
+    /**
+     * shutdown order: 1) disconnect the controller 2) disconnect participants
+     */
+
+    TestHelper.StartCMResult result;
+    Iterator<Map.Entry<String, TestHelper.StartCMResult>> it = _startCMResultMap.entrySet().iterator();
+    while (it.hasNext())
+    {
+      String instanceName = it.next().getKey();
+      if (instanceName.startsWith(CONTROLLER_PREFIX))
+      {
+        result = _startCMResultMap.get(instanceName);
+        result._manager.disconnect();
+        result._thread.interrupt();
+        it.remove();
+      }
+    }
+
+    Thread.sleep(100);
+    it = _startCMResultMap.entrySet().iterator();
+    while (it.hasNext())
+    {
+      String instanceName = it.next().getKey();
+      result = _startCMResultMap.get(instanceName);
+      result._manager.disconnect();
+      result._thread.interrupt();
+      it.remove();
+    }
+
+    _manager.disconnect();
+  }
+
+  @Test
+  public void basic()
+      throws Exception
+  {
+    basic(100);
+  }
+
+  @Test
+  public void zeroTaskCompletionTime()
+      throws Exception
+  {
+    basic(0);
+  }
+
+  @Test
+  public void testExpiry() throws Exception
+  {
+    String taskName = "Expiry";
+    long expiry = 1000;
+    Workflow flow = WorkflowGenerator.generateDefaultSingleTaskWorkflowBuilderWithExtraConfigs(taskName,
+            TaskConfig.COMMAND_CONFIG, String.valueOf(100)).setExpiry(expiry).build();
+
+    _driver.start(flow);
+    TestUtil.pollForWorkflowState(_manager, taskName, TaskState.IN_PROGRESS);
+
+    // Running workflow should have config and context viewable through accessor
+    HelixDataAccessor accessor = _manager.getHelixDataAccessor();
+    PropertyKey workflowCfgKey = accessor.keyBuilder().resourceConfig(taskName);
+    String workflowPropStoreKey = Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT, taskName);
+
+    // Ensure context and config exist
+    Assert.assertTrue(_manager.getHelixPropertyStore().exists(workflowPropStoreKey, AccessOption.PERSISTENT));
+    Assert.assertNotSame(accessor.getProperty(workflowCfgKey), null);
+
+    // Wait for task to finish and expire
+    TestUtil.pollForWorkflowState(_manager, taskName, TaskState.COMPLETED);
+    Thread.sleep(expiry);
+    _driver.invokeRebalance();
+    Thread.sleep(expiry);
+
+    // Ensure workflow config and context were cleaned up by now
+    Assert.assertFalse(_manager.getHelixPropertyStore().exists(workflowPropStoreKey, AccessOption.PERSISTENT));
+    Assert.assertEquals(accessor.getProperty(workflowCfgKey), null);
+  }
+
+  private void basic(long taskCompletionTime)
+      throws Exception
+  {
+    // We use a different resource name in each test method as a work around for a helix participant bug where it does
+    // not clear locally cached state when a resource partition is dropped. Once that is fixed we should change these
+    // tests to use the same resource name and implement a beforeMethod that deletes the task resource.
+    final String taskResource = "basic" + taskCompletionTime;
+    Workflow flow = WorkflowGenerator.generateDefaultSingleTaskWorkflowBuilderWithExtraConfigs(taskResource,
+            TaskConfig.COMMAND_CONFIG, String.valueOf(taskCompletionTime)).build();
+    _driver.start(flow);
+
+    // Wait for task completion
+    TestUtil.pollForWorkflowState(_manager, taskResource, TaskState.COMPLETED);
+
+    // Ensure all partitions are completed individually
+    TaskContext ctx = TaskUtil.getTaskContext(_manager, TaskUtil.getNamespacedTaskName(taskResource));
+    for (int i = 0; i < NUM_PARTITIONS; i++)
+    {
+      Assert.assertEquals(ctx.getPartitionState(i), TaskPartitionState.COMPLETED);
+      Assert.assertEquals(ctx.getPartitionNumAttempts(i), 1);
+    }
+  }
+
+  @Test
+  public void partitionSet()
+      throws Exception
+  {
+    final String taskResource = "partitionSet";
+    ImmutableList<Integer> targetPartitions = ImmutableList.of(1, 2, 3, 5, 8, 13);
+
+    // construct and submit our basic workflow
+    Workflow flow = WorkflowGenerator.generateDefaultSingleTaskWorkflowBuilderWithExtraConfigs(taskResource,
+            TaskConfig.COMMAND_CONFIG, String.valueOf(100),
+            TaskConfig.MAX_ATTEMPTS_PER_PARTITION, String.valueOf(1),
+            TaskConfig.TARGET_PARTITIONS, Joiner.on(",").join(targetPartitions)).build();
+    _driver.start(flow);
+
+    // wait for task completeness/timeout
+    TestUtil.pollForWorkflowState(_manager, taskResource, TaskState.COMPLETED);
+
+    // see if resulting context completed successfully for our partition set
+    String namespacedName = TaskUtil.getNamespacedTaskName(taskResource);
+
+    TaskContext ctx = TaskUtil.getTaskContext(_manager, namespacedName);
+    WorkflowContext workflowContext = TaskUtil.getWorkflowContext(_manager, taskResource);
+    Assert.assertNotNull(ctx);
+    Assert.assertNotNull(workflowContext);
+    Assert.assertEquals(workflowContext.getTaskState(namespacedName), TaskState.COMPLETED);
+    for (int i : targetPartitions)
+    {
+      Assert.assertEquals(ctx.getPartitionState(i), TaskPartitionState.COMPLETED);
+      Assert.assertEquals(ctx.getPartitionNumAttempts(i), 1);
+    }
+  }
+
+  @Test
+  public void testRepeatedWorkflow() throws Exception
+  {
+    String workflowName = "SomeWorkflow";
+    Workflow flow = WorkflowGenerator.generateDefaultRepeatedTaskWorkflowBuilder(workflowName).build();
+    new TaskDriver(_manager).start(flow);
+
+    // Wait until the task completes
+    TestUtil.pollForWorkflowState(_manager, workflowName, TaskState.COMPLETED);
+
+    // Assert completion for all tasks within two minutes
+    for(String task : flow.getTaskConfigs().keySet())
+    {
+      TestUtil.pollForTaskState(_manager, workflowName, task, TaskState.COMPLETED);
+    }
+  }
+
+  @Test
+  public void timeouts()
+      throws Exception
+  {
+    final String taskResource = "timeouts";
+    Workflow flow = WorkflowGenerator.generateDefaultSingleTaskWorkflowBuilderWithExtraConfigs(taskResource,
+            TaskConfig.MAX_ATTEMPTS_PER_PARTITION, String.valueOf(2),
+            TaskConfig.TIMEOUT_PER_PARTITION, String.valueOf(100)).build();
+    _driver.start(flow);
+
+    // Wait until the task reports failure.
+    TestUtil.pollForWorkflowState(_manager, taskResource, TaskState.FAILED);
+
+    // Check that all partitions timed out up to maxAttempts
+    TaskContext ctx = TaskUtil.getTaskContext(_manager, TaskUtil.getNamespacedTaskName(taskResource));
+    int maxAttempts = 0;
+    for (int i = 0; i < NUM_PARTITIONS; i++)
+    {
+      TaskPartitionState state = ctx.getPartitionState(i);
+      if (state != null)
+      {
+        Assert.assertEquals(state, TaskPartitionState.TIMED_OUT);
+        maxAttempts = Math.max(maxAttempts, ctx.getPartitionNumAttempts(i));
+      }
+    }
+    Assert.assertEquals(maxAttempts, 2);
+  }
+
+  private static class ReindexTask implements Task
+  {
+    private final long _delay;
+    private volatile boolean _canceled;
+
+    public ReindexTask(String cfg)
+    {
+      _delay = Long.parseLong(cfg);
+    }
+
+    @Override
+    public TaskResult run()
+    {
+      long expiry = System.currentTimeMillis() + _delay;
+      long timeLeft;
+      while (System.currentTimeMillis() < expiry)
+      {
+        if (_canceled)
+        {
+          timeLeft = expiry - System.currentTimeMillis();
+          return new TaskResult(TaskResult.Status.CANCELED, String.valueOf(timeLeft < 0 ? 0 : timeLeft));
+        }
+        sleep(50);
+      }
+      timeLeft = expiry - System.currentTimeMillis();
+      return new TaskResult(TaskResult.Status.COMPLETED, String.valueOf(timeLeft < 0 ? 0 : timeLeft));
+    }
+
+    @Override
+    public void cancel()
+    {
+      _canceled = true;
+    }
+
+    private static void sleep(long d)
+    {
+      try
+      {
+        Thread.sleep(d);
+      }
+      catch (InterruptedException e)
+      {
+        e.printStackTrace();
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/test/java/org/apache/helix/integration/task/TestTaskRebalancerStopResume.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/integration/task/TestTaskRebalancerStopResume.java b/helix-core/src/test/java/org/apache/helix/integration/task/TestTaskRebalancerStopResume.java
new file mode 100644
index 0000000..4c17397
--- /dev/null
+++ b/helix-core/src/test/java/org/apache/helix/integration/task/TestTaskRebalancerStopResume.java
@@ -0,0 +1,231 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.integration.task;
+
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.TestHelper;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.integration.ZkIntegrationTestBase;
+import org.apache.helix.integration.ZkStandAloneCMTestBase;
+import org.apache.helix.task.*;
+import org.apache.helix.tools.ClusterSetup;
+import org.apache.helix.tools.ClusterStateVerifier;
+import org.apache.log4j.Logger;
+import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+
+/**
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TestTaskRebalancerStopResume extends ZkIntegrationTestBase
+{
+  private static final Logger LOG = Logger.getLogger(ZkStandAloneCMTestBase.class);
+  private static final int NUM_NODES = 5;
+  private static final int START_PORT = 12918;
+  private static final String MASTER_SLAVE_STATE_MODEL = "MasterSlave";
+  private static final String TGT_DB = "TestDB";
+  private static final String TASK_RESOURCE = "SomeTask";
+  private static final int NUM_PARTITIONS = 20;
+  private static final int NUM_REPLICAS = 3;
+  private final String CLUSTER_NAME = CLUSTER_PREFIX + "_" + getShortClassName();
+  private final Map<String, TestHelper.StartCMResult> _startCMResultMap = new HashMap<String, TestHelper.StartCMResult>();
+  private HelixManager _manager;
+  private TaskDriver _driver;
+
+  @BeforeClass
+  public void beforeClass()
+      throws Exception
+  {
+    String namespace = "/" + CLUSTER_NAME;
+    if (_gZkClient.exists(namespace))
+    {
+      _gZkClient.deleteRecursive(namespace);
+    }
+
+    ClusterSetup setupTool = new ClusterSetup(ZK_ADDR);
+    setupTool.addCluster(CLUSTER_NAME, true);
+    for (int i = 0; i < NUM_NODES; i++)
+    {
+      String storageNodeName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
+      setupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName);
+    }
+
+    // Set up target db
+    setupTool.addResourceToCluster(CLUSTER_NAME, TGT_DB, NUM_PARTITIONS, MASTER_SLAVE_STATE_MODEL);
+    setupTool.rebalanceStorageCluster(CLUSTER_NAME, TGT_DB, NUM_REPLICAS);
+
+    Map<String, TaskFactory> taskFactoryReg = new HashMap<String, TaskFactory>();
+    taskFactoryReg.put("Reindex", new TaskFactory()
+    {
+      @Override
+      public Task createNewTask(String config)
+      {
+        return new ReindexTask(config);
+      }
+    });
+
+    // start dummy participants
+    for (int i = 0; i < NUM_NODES; i++)
+    {
+      String instanceName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
+      TestHelper.StartCMResult result = TestUtil.startDummyProcess(ZK_ADDR, CLUSTER_NAME, instanceName, taskFactoryReg);
+      _startCMResultMap.put(instanceName, result);
+    }
+
+    // start controller
+    String controllerName = CONTROLLER_PREFIX + "_0";
+    TestHelper.StartCMResult startResult = TestHelper.startController(CLUSTER_NAME,
+                                                                      controllerName,
+                                                                      ZK_ADDR,
+                                                                      HelixControllerMain.STANDALONE);
+    _startCMResultMap.put(controllerName, startResult);
+
+    // create cluster manager
+    _manager = HelixManagerFactory.getZKHelixManager(CLUSTER_NAME, "Admin", InstanceType.ADMINISTRATOR, ZK_ADDR);
+    _manager.connect();
+
+    _driver = new TaskDriver(_manager);
+
+    boolean result = ClusterStateVerifier.verifyByZkCallback(new ClusterStateVerifier.MasterNbInExtViewVerifier(ZK_ADDR,
+                                                                                                                CLUSTER_NAME));
+    Assert.assertTrue(result);
+
+    result = ClusterStateVerifier.verifyByZkCallback(new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR,
+                                                                                                           CLUSTER_NAME));
+    Assert.assertTrue(result);
+  }
+
+  @AfterClass
+  public void afterClass()
+      throws Exception
+  {
+    /**
+     * shutdown order: 1) disconnect the controller 2) disconnect participants
+     */
+
+    TestHelper.StartCMResult result;
+    Iterator<Map.Entry<String, TestHelper.StartCMResult>> it = _startCMResultMap.entrySet().iterator();
+    while (it.hasNext())
+    {
+      String instanceName = it.next().getKey();
+      if (instanceName.startsWith(CONTROLLER_PREFIX))
+      {
+        result = _startCMResultMap.get(instanceName);
+        result._manager.disconnect();
+        result._thread.interrupt();
+        it.remove();
+      }
+    }
+
+    Thread.sleep(100);
+    it = _startCMResultMap.entrySet().iterator();
+    while (it.hasNext())
+    {
+      String instanceName = it.next().getKey();
+      result = _startCMResultMap.get(instanceName);
+      result._manager.disconnect();
+      result._thread.interrupt();
+      it.remove();
+    }
+
+    _manager.disconnect();
+  }
+
+  @Test
+  public void stopAndResume()
+      throws Exception
+  {
+    Workflow flow = WorkflowGenerator.generateDefaultSingleTaskWorkflowBuilderWithExtraConfigs(TASK_RESOURCE,
+            TaskConfig.COMMAND_CONFIG, String.valueOf(100)).build();
+
+    LOG.info("Starting flow " + flow.getName());
+    _driver.start(flow);
+    TestUtil.pollForWorkflowState(_manager, TASK_RESOURCE, TaskState.IN_PROGRESS);
+
+    LOG.info("Pausing task");
+    _driver.stop(TASK_RESOURCE);
+    TestUtil.pollForWorkflowState(_manager, TASK_RESOURCE, TaskState.STOPPED);
+
+    LOG.info("Resuming task");
+    _driver.resume(TASK_RESOURCE);
+    TestUtil.pollForWorkflowState(_manager, TASK_RESOURCE, TaskState.COMPLETED);
+  }
+
+  @Test
+  public void stopAndResumeWorkflow()
+          throws Exception
+  {
+    String workflow = "SomeWorkflow";
+    Workflow flow = WorkflowGenerator.generateDefaultRepeatedTaskWorkflowBuilder(workflow).build();
+
+    LOG.info("Starting flow " + workflow);
+    _driver.start(flow);
+    TestUtil.pollForWorkflowState(_manager, workflow, TaskState.IN_PROGRESS);
+
+    LOG.info("Pausing workflow");
+    _driver.stop(workflow);
+    TestUtil.pollForWorkflowState(_manager, workflow, TaskState.STOPPED);
+
+    LOG.info("Resuming workflow");
+    _driver.resume(workflow);
+    TestUtil.pollForWorkflowState(_manager, workflow, TaskState.COMPLETED);
+  }
+
+  public static class ReindexTask implements Task
+  {
+    private final long _delay;
+    private volatile boolean _canceled;
+
+    public ReindexTask(String cfg)
+    {
+      _delay = Long.parseLong(cfg);
+    }
+
+    @Override
+    public TaskResult run()
+    {
+      long expiry = System.currentTimeMillis() + _delay;
+      long timeLeft;
+      while (System.currentTimeMillis() < expiry)
+      {
+        if (_canceled)
+        {
+          timeLeft = expiry - System.currentTimeMillis();
+          return new TaskResult(TaskResult.Status.CANCELED, String.valueOf(timeLeft < 0 ? 0 : timeLeft));
+        }
+        sleep(50);
+      }
+      timeLeft = expiry - System.currentTimeMillis();
+      return new TaskResult(TaskResult.Status.COMPLETED, String.valueOf(timeLeft < 0 ? 0 : timeLeft));
+    }
+
+    @Override
+    public void cancel()
+    {
+      _canceled = true;
+    }
+
+    private static void sleep(long d)
+    {
+      try
+      {
+        Thread.sleep(d);
+      }
+      catch (InterruptedException e)
+      {
+        e.printStackTrace();
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/test/java/org/apache/helix/integration/task/TestUtil.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/integration/task/TestUtil.java b/helix-core/src/test/java/org/apache/helix/integration/task/TestUtil.java
new file mode 100644
index 0000000..17592b7
--- /dev/null
+++ b/helix-core/src/test/java/org/apache/helix/integration/task/TestUtil.java
@@ -0,0 +1,128 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.integration.task;
+
+
+import java.util.Map;
+import org.apache.helix.HelixManager;
+import org.apache.helix.InstanceType;
+import org.apache.helix.TestHelper;
+import org.apache.helix.ZkHelixTestManager;
+import org.apache.helix.mock.participant.DummyProcess;
+import org.apache.helix.participant.StateMachineEngine;
+import org.apache.helix.task.*;
+import org.apache.log4j.Logger;
+import org.testng.Assert;
+
+
+/**
+ * Static test utility methods.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TestUtil
+{
+  private static final Logger LOG = Logger.getLogger(TestUtil.class);
+
+  public static TestHelper.StartCMResult startDummyProcess(String zkAddr,
+                                                            String clusterName,
+                                                            String instanceName,
+                                                            Map<String, TaskFactory> taskFactoryMap)
+      throws Exception
+  {
+    TestHelper.StartCMResult result = new TestHelper.StartCMResult();
+    ZkHelixTestManager manager = new ZkHelixTestManager(clusterName,
+                                                        instanceName,
+                                                        InstanceType.PARTICIPANT,
+                                                        zkAddr);
+    result._manager = manager;
+    Thread thread = new Thread(new MockInstanceThread(manager, instanceName, taskFactoryMap));
+    result._thread = thread;
+    thread.start();
+
+    return result;
+  }
+
+  /**
+   * Polls {@link org.apache.helix.task.TaskContext} for given task resource until a timeout is reached.
+   * If the task has not reached target state by then, an error is thrown
+   *
+   * @param workflowResource Resource to poll for completeness
+   * @throws InterruptedException
+   */
+  public static void pollForWorkflowState(HelixManager manager, String workflowResource, TaskState state)
+          throws InterruptedException
+  {
+    // Wait for completion.
+    long st = System.currentTimeMillis();
+    WorkflowContext ctx;
+    do
+    {
+      Thread.sleep(100);
+      ctx = TaskUtil.getWorkflowContext(manager, workflowResource);
+    }
+    while ((ctx == null || ctx.getWorkflowState() == null || ctx.getWorkflowState() != state)
+            && System.currentTimeMillis() < st + 2 * 60 * 1000 /* 2 mins */);
+
+    Assert.assertNotNull(ctx);
+    Assert.assertEquals(ctx.getWorkflowState(), state);
+  }
+
+  public static void pollForTaskState(HelixManager manager, String workflowResource, String taskName, TaskState state)
+          throws InterruptedException
+  {
+    // Wait for completion.
+    long st = System.currentTimeMillis();
+    WorkflowContext ctx;
+    do
+    {
+      Thread.sleep(100);
+      ctx = TaskUtil.getWorkflowContext(manager, workflowResource);
+    }
+    while ((ctx == null || ctx.getTaskState(taskName) == null || ctx.getTaskState(taskName) != state)
+            && System.currentTimeMillis() < st + 2 * 60 * 1000 /* 2 mins */);
+
+    Assert.assertNotNull(ctx);
+    Assert.assertEquals(ctx.getWorkflowState(), state);
+  }
+
+  private static class MockInstanceThread implements Runnable
+  {
+    private final HelixManager _manager;
+    private final String _instanceName;
+    private final Map<String, TaskFactory> _factoryMap;
+
+    public MockInstanceThread(HelixManager manager, String instanceName, Map<String, TaskFactory> factoryMap)
+    {
+      _manager = manager;
+      _instanceName = instanceName;
+      _factoryMap = factoryMap;
+    }
+
+    @Override
+    public void run()
+    {
+      try
+      {
+        StateMachineEngine stateMach = _manager.getStateMachineEngine();
+        // Register dummy MasterSlave state model factory.
+        stateMach.registerStateModelFactory("MasterSlave", new DummyProcess.DummyStateModelFactory(0));
+        // Register a Task state model factory.
+        stateMach.registerStateModelFactory("Task", new TaskStateModelFactory(_manager, _factoryMap));
+
+        _manager.connect();
+        Thread.currentThread().join();
+      }
+      catch (InterruptedException e)
+      {
+        LOG.info("participant:" + _instanceName + ", " + Thread.currentThread().getName() + " interrupted");
+      }
+      catch (Exception e)
+      {
+        LOG.error("participant:" + _instanceName + ", " + Thread.currentThread().getName() + " interrupted", e);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/test/java/org/apache/helix/integration/task/WorkflowGenerator.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/integration/task/WorkflowGenerator.java b/helix-core/src/test/java/org/apache/helix/integration/task/WorkflowGenerator.java
new file mode 100644
index 0000000..f096a1a
--- /dev/null
+++ b/helix-core/src/test/java/org/apache/helix/integration/task/WorkflowGenerator.java
@@ -0,0 +1,76 @@
+package org.apache.helix.integration.task;
+
+import org.apache.helix.task.Workflow;
+
+import java.util.Collections;
+import java.util.Map;
+import java.util.TreeMap;
+
+
+/**
+ * Convenience class for generating various test workflows
+ *
+ * @author Chris Beavers <cb...@linkedin.com>
+ */
+public class WorkflowGenerator
+{
+  public static final String DEFAULT_TGT_DB = "TestDB";
+  private static final String TASK_NAME_1 = "SomeTask1";
+  private static final String TASK_NAME_2 = "SomeTask2";
+
+  private static final Map<String, String> DEFAULT_TASK_CONFIG;
+  static {
+    Map<String, String> tmpMap = new TreeMap<String,String>();
+    tmpMap.put("TargetResource", DEFAULT_TGT_DB);
+    tmpMap.put("TargetPartitionStates", "MASTER");
+    tmpMap.put("Command", "Reindex");
+    tmpMap.put("CommandConfig", String.valueOf(2000));
+    tmpMap.put("TimeoutPerPartition", String.valueOf(10 * 1000));
+    DEFAULT_TASK_CONFIG = Collections.unmodifiableMap(tmpMap);
+  }
+
+  public static Workflow.Builder generateDefaultSingleTaskWorkflowBuilderWithExtraConfigs(String taskName,
+          String ... cfgs)
+  {
+    if(cfgs.length % 2 != 0)
+    {
+      throw new IllegalArgumentException("Additional configs should have even number of keys and values");
+    }
+    Workflow.Builder bldr = generateDefaultSingleTaskWorkflowBuilder(taskName);
+    for(int i=0; i<cfgs.length; i+=2)
+    {
+      bldr.addConfig(taskName, cfgs[i], cfgs[i+1]);
+    }
+
+    return bldr;
+  }
+
+  public static Workflow.Builder generateDefaultSingleTaskWorkflowBuilder(String taskName)
+  {
+    return generateSingleTaskWorkflowBuilder(taskName, DEFAULT_TASK_CONFIG);
+  }
+
+  public static Workflow.Builder generateSingleTaskWorkflowBuilder(String taskName, Map<String, String> config)
+  {
+    Workflow.Builder builder = new Workflow.Builder(taskName);
+    for(String key : config.keySet())
+    {
+      builder.addConfig(taskName, key, config.get(key));
+    }
+    return builder;
+  }
+
+  public static Workflow.Builder generateDefaultRepeatedTaskWorkflowBuilder(String workflowName)
+  {
+    Workflow.Builder builder = new Workflow.Builder(workflowName);
+    builder.addParentChildDependency(TASK_NAME_1, TASK_NAME_2);
+
+    for(String key : DEFAULT_TASK_CONFIG.keySet())
+    {
+      builder.addConfig(TASK_NAME_1, key, DEFAULT_TASK_CONFIG.get(key));
+      builder.addConfig(TASK_NAME_2, key, DEFAULT_TASK_CONFIG.get(key));
+    }
+
+    return builder;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/README.md
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/README.md b/recipes/auto-scale/README.md
new file mode 100644
index 0000000..f553246
--- /dev/null
+++ b/recipes/auto-scale/README.md
@@ -0,0 +1,82 @@
+<!---
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+Auto-Scaling with Apache Helix and Apache YARN
+------------------------
+Auto-scaling for helix clusters using a managed and a meta cluster. The managed cluster operates as usual, managing resources and instances via AUTO_REBALANCE. The meta cluster monitors the managed cluster and injects or removes instances based on demand.
+
+The meta cluster makes decisions about scaling up or down based on information obtained from a "ClusterStatusProvider". A custom "ProviderRebalancer" is invoked testing the health of existing participants in the managed cluster with the "ContainerStatusProvider". If participants need to be (re-)deployed the "ContainerProvider" is invoked to instantiate and inject participants in the managed cluster.
+
+ContainerProviders are the participants of the meta cluster and there are multiple different implementations of the "ContainerProvider". First, the "LocalContainerProvider" spawns VM-local participants, i.e. participants of the managed cluster are spawned in the same VM the container provider exists. This is mainly useful for testing. Second, the "ShellContainerProvider" spawns a separate VM process for each participant using shell commands. Third, the "YarnContainerProvider" creates processes as container on a YARN cluster and manages their status using an external meta-data service (Zookeeper in this implementation). This implementation is fairly complex and has a number of external dependencies on a working YARN cluster and running services.
+
+Even though there are different types of providers the notion of a "ContainerProcess" abstracts implementation specifics. A process implementation inherits from "ContainerProcess" and can be instantiated by all three types of container providers. CAUTION: since separate VM process might be used a VM external method for coordination is required (e.g. Zookeeper)
+
+Configuration settings are passed throughout the application using traditional Properties objects. The "ConfigTool" contains default paths and helps to inject dependencies in the ProviderRebalancer.
+
+The application can be run and tested in two ways. First, a comprehensive suite of unit and integration tests can be run using "mvn verify". Second, the "Bootstrapper" can deploy a live managed and meta cluster based on a specification (e.g. "2by2shell.properties"). 
+
+------------------------
+The IdealState of the meta cluster uses the ONLINE-OFFLINE model and maps as follows in the example below:
+
+Resource: type of container, e.g. database, webserver
+Partition: container id
+Instance: responsible container provider
+
+META:
+
+database
+  database_0
+    provider_0 : ONLINE
+  database_1
+    provider_1 : ONLINE
+webserver
+  webserver_0
+    provider_0 : ONLINE
+  webserver_1
+    provider_1 : ONLINE
+  webserver_2
+    provider_0 : ONLINE
+
+      
+MANAGED:
+
+dbprod (tag=database)
+  dbprod_0
+    database_0 : MASTER
+    database_1 : SLAVE
+  dbprod_1
+    database_0 : SLAVE
+    database_1 : MASTER
+  dbprod_2
+    database_0 : MASTER
+    database_1 : SLAVE
+wsprod (tag=webserver)
+  wsprod_0
+    webserver_0 : ONLINE
+  wsprod_1
+    webserver_1 : ONLINE
+  wsprod_2
+    webserver_2 : ONLINE
+  wsprod_3
+    webserver_0 : ONLINE
+  wsprod_4
+    webserver_1 : ONLINE
+  wsprod_5
+    webserver_2 : ONLINE
+    
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/pom.xml
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/pom.xml b/recipes/auto-scale/pom.xml
new file mode 100644
index 0000000..95331f4
--- /dev/null
+++ b/recipes/auto-scale/pom.xml
@@ -0,0 +1,210 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.helix.recipes</groupId>
+    <artifactId>recipes</artifactId>
+    <version>0.6.2-incubating-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>auto-scale</artifactId>
+  <packaging>jar</packaging>
+  <name>Apache Helix :: Recipes :: Auto-Scale</name>
+  
+  <properties>
+    <hadoop.version>0.23.9</hadoop.version>
+
+    <ut.groups>unit</ut.groups>
+    <it.groups>local, shell</it.groups>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.testng</groupId>
+      <artifactId>testng</artifactId>
+      <version>6.0.1</version>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>junit</groupId>
+          <artifactId>junit</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.google.code.gson</groupId>
+      <artifactId>gson</artifactId>
+      <version>2.2.4</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.helix</groupId>
+      <artifactId>helix-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.mail</groupId>
+          <artifactId>mail</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.jms</groupId>
+          <artifactId>jms</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jdmk</groupId>
+          <artifactId>jmxtools</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jmx</groupId>
+          <artifactId>jmxri</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>14.0.1</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-api</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-common</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
+  </dependencies>
+  
+  <build>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>org.codehaus.mojo</groupId>
+          <artifactId>appassembler-maven-plugin</artifactId>
+          <configuration>
+            <configurationDirectory>conf</configurationDirectory>
+            <copyConfigurationDirectory>true</copyConfigurationDirectory>
+            <includeConfigurationDirectoryInClasspath>true</includeConfigurationDirectoryInClasspath>
+            <assembleDirectory>${project.build.directory}/metamanager-pkg</assembleDirectory>
+            <extraJvmArguments>-Xms512m -Xmx512m</extraJvmArguments>
+            <platforms>
+              <platform>unix</platform>
+            </platforms>
+          </configuration>
+          <executions>
+            <execution>
+              <phase>package</phase>
+              <goals>
+                <goal>assemble</goal>
+              </goals>
+            </execution>
+          </executions>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.rat</groupId>
+          <artifactId>apache-rat-plugin</artifactId>
+            <configuration>
+              <excludes combine.children="append">
+              </excludes>
+            </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+    
+    <plugins>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>appassembler-maven-plugin</artifactId>
+        <configuration>
+          <programs>
+            <program>
+              <mainClass>org.apache.helix.autoscale.bootstrapper.Boot</mainClass>
+              <name>boot</name>
+            </program>
+            <program>
+              <mainClass>org.apache.helix.autoscale.impl.shell.ShellContainerProcess</mainClass>
+              <name>shell-container-process</name>
+            </program>
+            <program>
+              <mainClass>org.apache.helix.autoscale.impl.yarn.YarnMasterProcess</mainClass>
+              <name>yarn-master-process</name>
+            </program>
+            <program>
+              <mainClass>org.apache.helix.autoscale.impl.yarn.YarnContainerProcess</mainClass>
+              <name>yarn-container-process</name>
+            </program>
+            <program>
+              <mainClass>org.apache.helix.autoscale.ZookeeperSetter</mainClass>
+              <name>zookeeper-setter</name>
+            </program>
+          </programs>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>single</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <finalName>metamanager</finalName>
+          <descriptor>src/main/assembly/assembly.xml</descriptor>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <groups>${ut.groups}</groups>
+          <excludedGroups>integration</excludedGroups>
+          <suiteXmlFiles>
+            <suiteXmlFile>src/test/config/testng.xml</suiteXmlFile>
+          </suiteXmlFiles>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-failsafe-plugin</artifactId>
+        <configuration>
+          <groups>${it.groups}</groups>
+          <excludedGroups>unit</excludedGroups>
+          <suiteXmlFiles>
+            <suiteXmlFile>src/test/config/testng.xml</suiteXmlFile>
+          </suiteXmlFiles>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>integration-test</goal>
+              <goal>verify</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+  
+  <profiles>
+    <profile>
+      <id>yarn</id>
+      <properties>
+        <it.groups>yarn</it.groups>
+      </properties>
+    </profile>
+  </profiles>
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/assembly/assembly.xml
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/assembly/assembly.xml b/recipes/auto-scale/src/main/assembly/assembly.xml
new file mode 100644
index 0000000..03b2ca5
--- /dev/null
+++ b/recipes/auto-scale/src/main/assembly/assembly.xml
@@ -0,0 +1,32 @@
+<assembly
+  xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2" 
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2
+  http://maven.apache.org/xsd/assembly-1.1.2.xsd">
+
+  <id>assembly</id>
+  <formats>
+    <format>tar.gz</format>
+  </formats>
+  <baseDirectory>metamanager</baseDirectory>
+  <fileSets>
+    <fileSet>
+      <directory>target/metamanager-pkg/repo</directory>
+      <outputDirectory>repo</outputDirectory>
+      <excludes>
+        <exclude>**/maven-metadata-appassembler.xml</exclude>
+      </excludes>
+      <fileMode>0644</fileMode>
+    </fileSet> 
+    <fileSet>
+      <directory>target/metamanager-pkg/bin</directory>
+      <outputDirectory>bin</outputDirectory>
+      <fileMode>0755</fileMode>
+    </fileSet> 
+    <fileSet>
+      <directory>target/metamanager-pkg/conf</directory>
+      <outputDirectory>conf</outputDirectory>
+      <fileMode>0644</fileMode>
+    </fileSet> 
+  </fileSets>
+</assembly>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/config/log4j.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/config/log4j.properties b/recipes/auto-scale/src/main/config/log4j.properties
new file mode 100644
index 0000000..7f29be2
--- /dev/null
+++ b/recipes/auto-scale/src/main/config/log4j.properties
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Set root logger level to DEBUG and its only appender to R.
+log4j.rootLogger=ERROR, C
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.C=org.apache.log4j.ConsoleAppender
+log4j.appender.C.layout=org.apache.log4j.PatternLayout
+log4j.appender.C.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.I0Itec=ERROR
+log4j.logger.org.apache=ERROR
+
+log4j.logger.org.apache.helix.autoscale=INFO

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ClusterAdmin.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ClusterAdmin.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ClusterAdmin.java
new file mode 100644
index 0000000..f7808bd
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ClusterAdmin.java
@@ -0,0 +1,30 @@
+package org.apache.helix.autoscale;
+
+/**
+ * Abstraction for instance config (container) injection into and removal from
+ * the managed cluster.
+ * 
+ */
+public interface ClusterAdmin {
+
+    /**
+     * Add instance configuration to managed cluster.
+     * 
+     * @param instanceId
+     * @param instanceTag
+     */
+    public void addInstance(String instanceId, String instanceTag);
+
+    /**
+     * Remove instance configuration from managed cluster.<br/>
+     * <b>INVARIANT:</b> idempotent
+     * 
+     * @param instanceId
+     */
+    public void removeInstance(String instanceId);
+
+    /**
+     * Trigger rebalance of any affected resource in the managed cluster.
+     */
+    public void rebalance();
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ContainerProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ContainerProvider.java
new file mode 100644
index 0000000..8409ba3
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ContainerProvider.java
@@ -0,0 +1,40 @@
+package org.apache.helix.autoscale;
+
+import org.apache.helix.autoscale.provider.ProviderStateModel;
+
+/**
+ * Abstraction for container deployment framework. Creates and destroys
+ * container instances. Is invoked by ProviderStateModel and must be blocking.
+ * 
+ * @see ProviderStateModel
+ */
+public interface ContainerProvider {
+    /**
+     * Create container of given type.<br/>
+     * <b>INVARIANT:</b> synchronous invocation
+     * 
+     * @param id
+     *            unique user-defined container id
+     * @param containerType
+     *            container type
+     * @throws Exception
+     */
+    public void create(String id, String containerType) throws Exception;
+
+    /**
+     * Destroy container.<br/>
+     * <b>INVARIANT:</b> synchronous invocation
+     * 
+     * @param id
+     *            unique user-defined container id
+     * @throws Exception
+     */
+    public void destroy(String id) throws Exception;
+
+    /**
+     * Stops all running processes and destroys containers. Best-effort for
+     * cleanup.
+     * 
+     */
+    public void destroyAll();
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ContainerProviderService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ContainerProviderService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ContainerProviderService.java
new file mode 100644
index 0000000..1c39b7c
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ContainerProviderService.java
@@ -0,0 +1,9 @@
+package org.apache.helix.autoscale;
+
+/**
+ * ContainerProvider as configurable service.
+ * 
+ */
+public interface ContainerProviderService extends ContainerProvider, Service {
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/HelixClusterAdmin.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/HelixClusterAdmin.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/HelixClusterAdmin.java
new file mode 100644
index 0000000..0eb1362
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/HelixClusterAdmin.java
@@ -0,0 +1,43 @@
+package org.apache.helix.autoscale;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.log4j.Logger;
+
+/**
+ * Implementation of ClusterAdmin based on Helix.
+ * 
+ */
+public class HelixClusterAdmin implements ClusterAdmin {
+
+    static final Logger log = Logger.getLogger(HelixClusterAdmin.class);
+
+    final String        cluster;
+    final HelixAdmin    admin;
+
+    public HelixClusterAdmin(String clusterName, HelixAdmin admin) {
+        this.cluster = clusterName;
+        this.admin = admin;
+    }
+
+    @Override
+    public synchronized void addInstance(String instanceId, String instanceTag) {
+        log.debug(String.format("injecting instance %s (tag=%s) in cluster %s", instanceId, instanceTag, cluster));
+        admin.addInstance(cluster, new InstanceConfig(instanceId));
+        admin.addInstanceTag(cluster, instanceId, instanceTag);
+    }
+
+    @Override
+    public synchronized void removeInstance(String connection) {
+        log.debug(String.format("removing instance %s from cluster %s", connection, cluster));
+        admin.dropInstance(cluster, new InstanceConfig(connection));
+    }
+
+    @Override
+    public void rebalance() {
+        for (String resourceName : admin.getResourcesInCluster(cluster)) {
+            int replica = Integer.parseInt(admin.getResourceIdealState(cluster, resourceName).getReplicas());
+            admin.rebalance(cluster, resourceName, replica);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/Service.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/Service.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/Service.java
new file mode 100644
index 0000000..a60d110
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/Service.java
@@ -0,0 +1,38 @@
+package org.apache.helix.autoscale;
+
+import java.util.Properties;
+
+/**
+ * Abstraction for configurable and runnable service. Light-weight dependency
+ * injection and life-cycle management.
+ * 
+ */
+public interface Service {
+
+    /**
+     * Configure service internals<br/>
+     * <b>INVARIANT:</b> executed only once
+     * 
+     * @param properties
+     *            arbitrary key-value properties, parsed internally
+     * @throws Exception
+     */
+    void configure(Properties properties) throws Exception;
+
+    /**
+     * Start service.<br/>
+     * <b>PRECONDITION:</b> configure() was invoked<br/>
+     * <b>INVARIANT:</b> executed only once
+     * 
+     * @throws Exception
+     */
+    void start() throws Exception;
+
+    /**
+     * Stop service.<br/>
+     * <b>INVARIANT:</b> idempotent
+     * 
+     * @throws Exception
+     */
+    void stop() throws Exception;
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/StatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/StatusProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/StatusProvider.java
new file mode 100644
index 0000000..ce0c29d
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/StatusProvider.java
@@ -0,0 +1,35 @@
+package org.apache.helix.autoscale;
+
+import org.apache.helix.autoscale.provider.ProviderRebalancer;
+
+/**
+ * Abstraction for status reader of container deployment framework. Provides
+ * information on physical existence of container and activity or failure state.
+ * Is polled by ProviderRebalancer and should be light-weight and non-blocking.<br/>
+ * <b>NOTE:</b> This information is solely based on the low-level framework and
+ * may be different from the participant state in Helix. (The Helix participant
+ * may not even exist)
+ * 
+ * @see ProviderRebalancer
+ */
+public interface StatusProvider {
+
+    /**
+     * Determine whether container physically exists.
+     * 
+     * @param id
+     *            unique container id
+     * @return true, if container is present
+     */
+    public boolean exists(String id);
+
+    /**
+     * Determine whether container is healthy as determined by the deployment
+     * framework.
+     * 
+     * @param id
+     *            unique container id
+     * @return true, if container is healthy
+     */
+    public boolean isHealthy(String id);
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/StatusProviderService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/StatusProviderService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/StatusProviderService.java
new file mode 100644
index 0000000..756fe4a
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/StatusProviderService.java
@@ -0,0 +1,9 @@
+package org.apache.helix.autoscale;
+
+/**
+ * StatusProvider as configurable service.
+ * 
+ */
+public interface StatusProviderService extends StatusProvider, Service {
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/TargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/TargetProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/TargetProvider.java
new file mode 100644
index 0000000..243a977
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/TargetProvider.java
@@ -0,0 +1,25 @@
+package org.apache.helix.autoscale;
+
+import org.apache.helix.autoscale.provider.ProviderRebalancer;
+
+/**
+ * Abstraction for target computation and statistics collection. Provides target
+ * count of containers for ProviderRebalancer. Is polled by ProviderRebalancer
+ * and should be light-weight and non-blocking.<br/>
+ * <b>NOTE:</b> The target count is oblivious of failed containers and can be
+ * obtained in an arbitrary way. See implementations for examples.
+ * 
+ * @see ProviderRebalancer
+ */
+public interface TargetProvider {
+
+    /**
+     * Return target count of containers of a specific type.
+     * 
+     * @param containerType
+     *            meta resource name
+     * @return container count >= 1
+     * @throws Exception
+     */
+    public int getTargetContainerCount(String containerType) throws Exception;
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/TargetProviderService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/TargetProviderService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/TargetProviderService.java
new file mode 100644
index 0000000..a9ce207
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/TargetProviderService.java
@@ -0,0 +1,9 @@
+package org.apache.helix.autoscale;
+
+/**
+ * TargetProvider as configurable service.
+ * 
+ */
+public interface TargetProviderService extends TargetProvider, Service {
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ZookeeperSetter.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ZookeeperSetter.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ZookeeperSetter.java
new file mode 100644
index 0000000..0c47999
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ZookeeperSetter.java
@@ -0,0 +1,30 @@
+package org.apache.helix.autoscale;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.log4j.Logger;
+
+/**
+ * Utility for setting String values in the embedded zookeeper service.
+ * (Program entry point)
+ * 
+ */
+public class ZookeeperSetter {
+
+    static Logger log = Logger.getLogger(ZookeeperSetter.class);
+
+    /**
+     * @param args
+     */
+    public static void main(String[] args) {
+        String address = args[0];
+        String path = args[1];
+        String value = args[2];
+
+        log.info(String.format("Setting %s:%s to '%s'", address, path, value));
+
+        ZkClient client = new ZkClient(address);
+        client.createPersistent(path, true);
+        client.writeData(path, value);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/Boot.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/Boot.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/Boot.java
new file mode 100644
index 0000000..a501c9a
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/Boot.java
@@ -0,0 +1,132 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.autoscale.Service;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+/**
+ * Bootstrapper for elastic cluster deployment using *.properties configuration
+ * files. (Program entry point)
+ * 
+ */
+public class Boot implements Service {
+
+    static final Logger       log          = Logger.getLogger(Boot.class);
+
+    static final Map<String, Class<? extends Service>> classes      = new HashMap<String, Class<? extends Service>>();
+    static {
+        classes.put("zookeeper", ZookeeperService.class);
+        classes.put("cluster", ClusterService.class);
+        classes.put("resource", ResourceService.class);
+        classes.put("controller", ControllerService.class);
+        classes.put("metacluster", MetaClusterService.class);
+        classes.put("metaresource", MetaResourceService.class);
+        classes.put("metaprovider", MetaProviderService.class);
+        classes.put("metacontroller", MetaControllerService.class);
+    }
+
+    static final List<String> serviceOrder = Arrays.asList("zookeeper", "cluster", "resource", "metacluster", "metaresource",
+                                                                            "metaprovider", "controller", "metacontroller");
+
+    Properties                properties;
+    List<Service>             services     = Lists.newArrayList();
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        Preconditions.checkNotNull(properties);
+        this.properties = properties;
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.info(String.format("bootstraping started"));
+
+        for (String key : serviceOrder) {
+            if (BootUtils.hasNamespace(properties, key + ".0")) {
+                processIndexedNamespace(key);
+            } else if (BootUtils.hasNamespace(properties, key)) {
+                processNamespace(key);
+            }
+        }
+
+        log.info(String.format("bootstraping completed"));
+    }
+
+    private void processIndexedNamespace(String key) throws Exception {
+        int i = 0;
+        String indexedKey = key + "." + i;
+
+        while (BootUtils.hasNamespace(properties, indexedKey)) {
+            log.info(String.format("processing namespace '%s'", indexedKey));
+            Service service = BootUtils.createInstance(classes.get(key));
+            service.configure(BootUtils.getNamespace(properties, indexedKey));
+            service.start();
+
+            services.add(service);
+
+            i++;
+            indexedKey = key + "." + i;
+        }
+    }
+
+    private void processNamespace(String key) throws Exception {
+        log.info(String.format("processing namespace '%s'", key));
+        Service service = BootUtils.createInstance(classes.get(key));
+        service.configure(BootUtils.getNamespace(properties, key));
+        service.start();
+
+        services.add(service);
+    }
+
+    @Override
+    public void stop() throws Exception {
+        log.info(String.format("shutdown started"));
+
+        Collections.reverse(services);
+        for (Service service : services) {
+            service.stop();
+        }
+
+        log.info(String.format("shutdown completed"));
+    }
+
+    public Collection<Service> getServcies() {
+        return services;
+    }
+
+    public static void main(String[] args) throws Exception {
+        if (args.length < 1) {
+            log.error(String.format("Usage: Boot properties_path"));
+            return;
+        }
+
+        String resourcePath = args[0];
+
+        log.info(String.format("reading definition from '%s'", resourcePath));
+        Properties properties = new Properties();
+        properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+
+        final Boot boot = new Boot();
+        boot.configure(properties);
+        boot.start();
+
+        Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+            @Override
+            public void run() {
+                log.debug("Running shutdown hook");
+                try { boot.stop(); } catch (Exception ignore) {}
+            }
+        }));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/BootUtils.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/BootUtils.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/BootUtils.java
new file mode 100644
index 0000000..53498b9
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/BootUtils.java
@@ -0,0 +1,104 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.Collection;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+/**
+ * Utility for instantiating bootstrapping services and parsing hierarchical
+ * properties files.
+ * 
+ */
+public class BootUtils {
+
+    public static final String CLASS_PROPERTY = "class";
+    static final Logger        log            = Logger.getLogger(BootUtils.class);
+
+    public static boolean hasNamespace(Properties properties, String namespace) {
+        String prefix = namespace + ".";
+        for (String key : properties.stringPropertyNames()) {
+            if (key.startsWith(prefix))
+                return true;
+        }
+        return false;
+    }
+
+    public static Set<String> getNamespaces(Properties properties) {
+        Pattern pattern = Pattern.compile("^([^\\.\\=]+)");
+
+        Set<String> namespaces = Sets.newHashSet();
+
+        for (Map.Entry<Object, Object> rawEntry : properties.entrySet()) {
+            String key = (String) rawEntry.getKey();
+
+            Matcher matcher = pattern.matcher(key);
+            if (matcher.find()) {
+                namespaces.add(matcher.group(1));
+            }
+        }
+
+        return namespaces;
+    }
+
+    public static Properties getNamespace(Properties source, String namespace) {
+        Properties dest = new Properties();
+        String prefix = namespace + ".";
+
+        for (Map.Entry<Object, Object> rawEntry : source.entrySet()) {
+            String key = (String) rawEntry.getKey();
+            String value = (String) rawEntry.getValue();
+
+            if (key.startsWith(prefix)) {
+                String newKey = key.substring(prefix.length());
+                dest.put(newKey, value);
+            }
+        }
+
+        return dest;
+    }
+
+    public static Collection<Properties> getContainerProps(Properties properties) {
+        Collection<Properties> containerProps = Lists.newArrayList();
+
+        String containers = properties.getProperty("containers");
+        String containerTypes[] = StringUtils.split(containers, ",");
+
+        for (String containerType : containerTypes) {
+            Properties containerProp = BootUtils.getNamespace(BootUtils.getNamespace(properties, "container"), containerType);
+            log.debug(String.format("adding container type (type='%s', properties='%s')", containerType, containerProp));
+            containerProps.add(containerProp);
+        }
+
+        return containerProps;
+    }
+
+    @SuppressWarnings("unchecked")
+    public static <T> T createInstance(Class<?> clazz) throws Exception {
+        try {
+            log.debug(String.format("checking for default constructor in class '%s'", clazz.getSimpleName()));
+            return (T) clazz.getConstructor().newInstance();
+        } catch (Exception e) {
+            log.debug("no default constructor found");
+        }
+
+        throw new Exception(String.format("no suitable constructor for class '%s'", clazz.getSimpleName()));
+    }
+
+    public static <T> T createInstance(String className) throws Exception {
+        return createInstance(Class.forName(className));
+    }
+
+    private BootUtils() {
+        // left blank
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ClusterService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ClusterService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ClusterService.java
new file mode 100644
index 0000000..730740f
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ClusterService.java
@@ -0,0 +1,46 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+/**
+ * Cluster bootstrapping. Create Helix data structures in zookeeper for the
+ * managed cluster.
+ * 
+ */
+public class ClusterService implements Service {
+
+    static final Logger log = Logger.getLogger(ClusterService.class);
+
+    String              name;
+    String              address;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        name = properties.getProperty("name", "cluster");
+        address = properties.getProperty("address", "localhost:2199");
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.info(String.format("setting up '%s/%s'", address, name));
+        HelixAdmin admin = new ZKHelixAdmin(address);
+        admin.addCluster(name, false);
+        admin.addStateModelDef(name, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+        admin.addStateModelDef(name, "MasterSlave", new StateModelDefinition(StateModelConfigGenerator.generateConfigForMasterSlave()));
+        admin.close();
+        log.info("setup complete");
+    }
+
+    @Override
+    public void stop() throws Exception {
+        // left blank
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ControllerService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ControllerService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ControllerService.java
new file mode 100644
index 0000000..5e659f2
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ControllerService.java
@@ -0,0 +1,50 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.Properties;
+import java.util.concurrent.ScheduledExecutorService;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.log4j.Logger;
+
+/**
+ * Helix controller bootstrapping and management. Create standalone controller
+ * for managed Helix cluster.
+ * 
+ */
+public class ControllerService implements Service {
+
+    static final Logger      log = Logger.getLogger(ControllerService.class);
+
+    String                   name;
+    String                   cluster;
+    String                   address;
+
+    HelixManager             manager;
+
+    ScheduledExecutorService executor;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        name = properties.getProperty("name", "controller");
+        cluster = properties.getProperty("cluster", "cluster");
+        address = properties.getProperty("address", "localhost:2199");
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.info(String.format("starting controller '%s' at '%s/%s'", name, address, cluster));
+        manager = HelixControllerMain.startHelixController(address, cluster, name, HelixControllerMain.STANDALONE);
+    }
+
+    @Override
+    public void stop() throws Exception {
+        if (manager != null) {
+            log.info(String.format("stopping controller '%s' at '%s/%s'", name, address, cluster));
+            manager.disconnect();
+            manager = null;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaClusterService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaClusterService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaClusterService.java
new file mode 100644
index 0000000..30ecb4d
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaClusterService.java
@@ -0,0 +1,61 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+/**
+ * Meta cluster bootstrapping. Create Helix data structures in zookeeper for
+ * the meta cluster.
+ * 
+ */
+public class MetaClusterService implements Service {
+
+    static final Logger log = Logger.getLogger(MetaClusterService.class);
+
+    String              name;
+    String              address;
+    String              managedCluster;
+    String              managedAddress;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        name = properties.getProperty("name", "metacluster");
+        address = properties.getProperty("address", "localhost:2199");
+        managedCluster = properties.getProperty("managedcluster", "cluster");
+        managedAddress = properties.getProperty("managedaddress", "localhost:2199");
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.info(String.format("setting up '%s/%s'", address, name));
+        HelixAdmin admin = new ZKHelixAdmin(address);
+        admin.addCluster(name, false);
+        admin.addStateModelDef(name, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+
+        HelixConfigScope scope = new HelixConfigScopeBuilder(ConfigScopeProperty.CLUSTER, name).build();
+        Map<String, String> properties = new HashMap<String, String>();
+        properties.put("cluster", managedCluster);
+        properties.put("address", managedAddress);
+        admin.setConfig(scope, properties);
+
+        admin.close();
+        log.info("setup complete");
+    }
+
+    @Override
+    public void stop() throws Exception {
+        // left blank
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaControllerService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaControllerService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaControllerService.java
new file mode 100644
index 0000000..7a0221c
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaControllerService.java
@@ -0,0 +1,114 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.Properties;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.StatusProviderService;
+import org.apache.helix.autoscale.TargetProviderService;
+import org.apache.helix.autoscale.provider.ProviderRebalancerSingleton;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.model.IdealState;
+import org.apache.log4j.Logger;
+
+/**
+ * Meta cluster controller bootstrapping and management. Create standalone
+ * controller for Helix meta cluster. Spawn StatusProvider and TargetProvider
+ * and trigger periodic status refresh in meta cluster.
+ * 
+ */
+public class MetaControllerService implements Service {
+
+    static final Logger      log = Logger.getLogger(MetaControllerService.class);
+
+    String                   name;
+    String                   metacluster;
+    String                   metaaddress;
+    long                     autorefresh;
+
+    HelixManager             manager;
+    StatusProviderService    statusService;
+    TargetProviderService    targetService;
+    ScheduledExecutorService executor;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        name = properties.getProperty("name", "controller");
+        metacluster = properties.getProperty("metacluster", "metacluster");
+        metaaddress = properties.getProperty("metaaddress", "localhost:2199");
+        autorefresh = Long.valueOf(properties.getProperty("autorefresh", "0"));
+
+        Properties statusProperties = BootUtils.getNamespace(properties, "status");
+        statusService = BootUtils.createInstance(Class.forName(statusProperties.getProperty("class")));
+        statusService.configure(statusProperties);
+        ProviderRebalancerSingleton.setStatusProvider(statusService);
+
+        Properties targetProperties = BootUtils.getNamespace(properties, "target");
+        targetService = BootUtils.createInstance(Class.forName(targetProperties.getProperty("class")));
+        targetService.configure(targetProperties);
+        ProviderRebalancerSingleton.setTargetProvider(targetService);
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.debug("Starting status service");
+        statusService.start();
+
+        log.debug("Starting target service");
+        targetService.start();
+
+        log.info(String.format("starting controller '%s' at '%s/%s'", name, metaaddress, metacluster));
+        manager = HelixControllerMain.startHelixController(metaaddress, metacluster, name, HelixControllerMain.STANDALONE);
+
+        if (autorefresh > 0) {
+            log.debug(String.format("installing autorefresh with interval %d ms", autorefresh));
+            executor = Executors.newSingleThreadScheduledExecutor();
+            executor.scheduleAtFixedRate(new RefreshRunnable(), autorefresh, autorefresh, TimeUnit.MILLISECONDS);
+        }
+    }
+
+    @Override
+    public void stop() throws Exception {
+        if (executor != null) {
+            executor.shutdownNow();
+            while (!executor.isTerminated()) {
+                Thread.sleep(100);
+            }
+            executor = null;
+        }
+        if (manager != null) {
+            log.info(String.format("Stopping controller '%s' at '%s/%s'", name, metaaddress, metacluster));
+            manager.disconnect();
+            manager = null;
+        }
+        if (targetService != null) {
+            log.debug("Stopping target service");
+            targetService.stop();
+            targetService = null;
+        }
+        if (statusService != null) {
+            log.debug("Stopping status service");
+            statusService.stop();
+            statusService = null;
+        }
+    }
+
+    private class RefreshRunnable implements Runnable {
+        @Override
+        public void run() {
+            log.debug("running status refresh");
+            HelixAdmin admin = manager.getClusterManagmentTool();
+
+            for (String metaResource : admin.getResourcesInCluster(metacluster)) {
+                log.debug(String.format("refreshing meta resource '%s'", metaResource));
+
+                IdealState poke = admin.getResourceIdealState(metacluster, metaResource);
+                admin.setResourceIdealState(metacluster, metaResource, poke);
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaProviderService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaProviderService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaProviderService.java
new file mode 100644
index 0000000..a8a9064
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaProviderService.java
@@ -0,0 +1,81 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.provider.ProviderProperties;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Lists;
+
+/**
+ * ContainerProvider bootstrapping and management. Create container provider
+ * participant, configure with container properties from meta resources and
+ * connect to meta cluster.
+ * 
+ */
+public class MetaProviderService implements Service {
+
+    static final Logger log = Logger.getLogger(MetaProviderService.class);
+
+    Service             service;
+
+    String              clazz;
+    String              metaAddress;
+    String              metaCluster;
+
+    ProviderProperties  config;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        clazz = properties.getProperty("class");
+        metaAddress = properties.getProperty("metaaddress", "localhost:2199");
+        metaCluster = properties.getProperty("metacluster", "metacluster");
+
+        config = new ProviderProperties();
+        config.putAll(properties);
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.info(String.format("starting service '%s' (config=%s)", clazz, config));
+
+        HelixAdmin admin = new ZKHelixAdmin(metaAddress);
+
+        HelixConfigScope managedScope = new HelixConfigScopeBuilder(ConfigScopeProperty.CLUSTER, metaCluster).build();
+        Map<String, String> managedProps = admin.getConfig(managedScope, Lists.newArrayList("cluster", "address"));
+        config.putAll(managedProps);
+
+        for (String resource : admin.getResourcesInCluster(metaCluster)) {
+            HelixConfigScope resScope = new HelixConfigScopeBuilder(ConfigScopeProperty.RESOURCE, metaCluster, resource).build();
+            List<String> resKeys = admin.getConfigKeys(resScope);
+            Map<String, String> resProps = admin.getConfig(resScope, resKeys);
+
+            Properties properties = new Properties();
+            properties.putAll(resProps);
+
+            config.addContainer(resource, properties);
+        }
+
+        service = BootUtils.createInstance(clazz);
+        service.configure(config);
+        service.start();
+    }
+
+    @Override
+    public void stop() throws Exception {
+        log.info(String.format("stopping service '%s' (config=%s)", clazz, config));
+        if (service != null) {
+            service.stop();
+            service = null;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaResourceService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaResourceService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaResourceService.java
new file mode 100644
index 0000000..55478ad
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaResourceService.java
@@ -0,0 +1,87 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.provider.ProviderRebalancer;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Maps;
+
+/**
+ * Bootstrapping meta resource. Create container type configuration in Helix
+ * zookeeper namespace.
+ * 
+ */
+public class MetaResourceService implements Service {
+
+    static final Logger log = Logger.getLogger(MetaResourceService.class);
+
+    String              metaCluster;
+    String              metaAddress;
+    String              name;
+    Map<String, String> config;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        metaCluster = properties.getProperty("metacluster", "metacluster");
+        metaAddress = properties.getProperty("metaaddress", "localhost:2199");
+        name = properties.getProperty("name", "container");
+
+        this.config = new HashMap<String, String>();
+        for (Map.Entry<Object, Object> entry : properties.entrySet()) {
+            this.config.put((String) entry.getKey(), (String) entry.getValue());
+        }
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.info(String.format("setting up meta resource '%s' at '%s/%s'", name, metaAddress, metaCluster));
+        HelixAdmin admin = new ZKHelixAdmin(metaAddress);
+
+        log.info(String.format("setting up container '%s' (config='%s')", name, config));
+
+        admin.addResource(metaCluster, name, 1, "OnlineOffline", RebalanceMode.USER_DEFINED.toString());
+        IdealState idealState = admin.getResourceIdealState(metaCluster, name);
+        idealState.setRebalancerClassName(ProviderRebalancer.class.getName());
+        idealState.setReplicas("1");
+
+        // BEGIN workaround
+        // FIXME workaround for HELIX-226
+        Map<String, List<String>> listFields = Maps.newHashMap();
+        Map<String, Map<String, String>> mapFields = Maps.newHashMap();
+        for (int i = 0; i < 256; i++) {
+            String partitionName = name + "_" + i;
+            listFields.put(partitionName, new ArrayList<String>());
+            mapFields.put(partitionName, new HashMap<String, String>());
+        }
+        idealState.getRecord().setListFields(listFields);
+        idealState.getRecord().setMapFields(mapFields);
+        // END workaround
+
+        admin.setResourceIdealState(metaCluster, name, idealState);
+
+        HelixConfigScope scope = new HelixConfigScopeBuilder(ConfigScopeProperty.RESOURCE, metaCluster, name).build();
+        admin.setConfig(scope, this.config);
+
+        admin.close();
+        log.info("setup complete");
+    }
+
+    @Override
+    public void stop() throws Exception {
+        // left blank
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ResourceService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ResourceService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ResourceService.java
new file mode 100644
index 0000000..0952b36
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ResourceService.java
@@ -0,0 +1,61 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.log4j.Logger;
+
+/**
+ * Bootstrapping Helix resource. Create resource in Helix and configure
+ * properties.
+ * 
+ */
+public class ResourceService implements Service {
+
+    static final Logger log = Logger.getLogger(ResourceService.class);
+
+    String              cluster;
+    String              address;
+    String              container;
+    String              name;
+    String              model;
+    int                 partitions;
+    int                 replica;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        cluster = properties.getProperty("cluster", "cluster");
+        address = properties.getProperty("address", "localhost:2199");
+        name = properties.getProperty("name", "resource");
+        container = properties.getProperty("container", "container");
+        model = properties.getProperty("model", "OnlineOffline");
+        partitions = Integer.parseInt(properties.getProperty("partitions", "1"));
+        replica = Integer.parseInt(properties.getProperty("replica", "1"));
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.info(String.format("setting up resource '%s' at '%s/%s'", name, address, cluster));
+        HelixAdmin admin = new ZKHelixAdmin(address);
+
+        log.info(String.format("setting up resource '%s' (container='%s', model='%s', partitions=%d, replica=%d)", name, container, model, partitions, replica));
+
+        admin.addResource(cluster, name, partitions, model, RebalanceMode.FULL_AUTO.toString());
+        IdealState idealState = admin.getResourceIdealState(cluster, name);
+        idealState.setInstanceGroupTag(container);
+        idealState.setReplicas(String.valueOf(replica));
+        admin.setResourceIdealState(cluster, name, idealState);
+        admin.close();
+        log.info("setup complete");
+    }
+
+    @Override
+    public void stop() throws Exception {
+        // left blank
+    }
+
+}


[14/15] Adding Helix-task-framework and Yarn integration modules

Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskRebalancer.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskRebalancer.java b/helix-core/src/main/java/org/apache/helix/task/TaskRebalancer.java
new file mode 100644
index 0000000..5664713
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskRebalancer.java
@@ -0,0 +1,736 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Sets;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import org.apache.helix.AccessOption;
+import org.apache.helix.HelixDataAccessor;
+import org.apache.helix.HelixManager;
+import org.apache.helix.PropertyKey;
+import org.apache.helix.ZNRecord;
+import org.apache.helix.controller.rebalancer.Rebalancer;
+import org.apache.helix.controller.stages.ClusterDataCache;
+import org.apache.helix.controller.stages.CurrentStateOutput;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.Partition;
+import org.apache.helix.model.Resource;
+import org.apache.helix.model.ResourceAssignment;
+import org.apache.log4j.Logger;
+
+
+/**
+ * Custom rebalancer implementation for the {@code Task} state model.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskRebalancer implements Rebalancer
+{
+  private static final Logger LOG = Logger.getLogger(TaskRebalancer.class);
+  private HelixManager _manager;
+
+  @Override
+  public void init(HelixManager manager)
+  {
+    _manager = manager;
+  }
+
+  @Override
+  public ResourceAssignment computeResourceMapping(Resource resource,
+                                                   IdealState taskIs,
+                                                   CurrentStateOutput currStateOutput,
+                                                   ClusterDataCache clusterData)
+  {
+    final String resourceName = resource.getResourceName();
+
+    // Fetch task configuration
+    TaskConfig taskCfg = TaskUtil.getTaskCfg(_manager, resourceName);
+    String workflowResource = taskCfg.getWorkflow();
+
+    // Fetch workflow configuration and context
+    WorkflowConfig workflowCfg = TaskUtil.getWorkflowCfg(_manager, workflowResource);
+    WorkflowContext workflowCtx = TaskUtil.getWorkflowContext(_manager, workflowResource);
+
+    // Initialize workflow context if needed
+    if (workflowCtx == null)
+    {
+      workflowCtx = new WorkflowContext(new ZNRecord("WorkflowContext"));
+      workflowCtx.setStartTime(System.currentTimeMillis());
+    }
+
+    // Check parent dependencies
+    for (String parent : workflowCfg.getTaskDag().getDirectParents(resourceName))
+    {
+      if (workflowCtx.getTaskState(parent) == null || !workflowCtx.getTaskState(parent).equals(TaskState.COMPLETED))
+      {
+        return emptyAssignment(resourceName);
+      }
+    }
+
+    // Clean up if workflow marked for deletion
+    TargetState targetState = workflowCfg.getTargetState();
+    if (targetState == TargetState.DELETE)
+    {
+      cleanup(_manager, resourceName, workflowCfg, workflowResource);
+      return emptyAssignment(resourceName);
+    }
+
+    // Check if this workflow has been finished past its expiry.
+    if (workflowCtx.getFinishTime() != WorkflowContext.UNFINISHED
+        && workflowCtx.getFinishTime() + workflowCfg.getExpiry() <= System.currentTimeMillis())
+    {
+      markForDeletion(_manager, workflowResource);
+      cleanup(_manager, resourceName, workflowCfg, workflowResource);
+      return emptyAssignment(resourceName);
+    }
+
+    // Fetch any existing context information from the property store.
+    TaskContext taskCtx = TaskUtil.getTaskContext(_manager, resourceName);
+    if (taskCtx == null)
+    {
+      taskCtx = new TaskContext(new ZNRecord("TaskContext"));
+      taskCtx.setStartTime(System.currentTimeMillis());
+    }
+
+    // The task is already in a final state (completed/failed).
+    if (workflowCtx.getTaskState(resourceName) == TaskState.FAILED
+            || workflowCtx.getTaskState(resourceName) == TaskState.COMPLETED)
+    {
+      return emptyAssignment(resourceName);
+    }
+
+    ResourceAssignment prevAssignment = TaskUtil.getPrevResourceAssignment(_manager, resourceName);
+    if (prevAssignment == null)
+    {
+      prevAssignment = new ResourceAssignment(resourceName);
+    }
+
+    // Will contain the list of partitions that must be explicitly dropped from the ideal state that is stored in zk.
+    // Fetch the previous resource assignment from the property store. This is required because of HELIX-230.
+    Set<Integer> partitionsToDrop = new TreeSet<Integer>();
+
+    ResourceAssignment newAssignment = computeResourceMapping(resourceName,
+                                                              workflowCfg,
+                                                              taskCfg,
+                                                              prevAssignment,
+                                                              clusterData.getIdealState(taskCfg.getTargetResource()),
+                                                              clusterData.getLiveInstances().keySet(),
+                                                              currStateOutput,
+                                                              workflowCtx,
+                                                              taskCtx,
+                                                              partitionsToDrop);
+
+    if (!partitionsToDrop.isEmpty())
+    {
+      for (Integer pId : partitionsToDrop)
+      {
+        taskIs.getRecord().getMapFields().remove(pName(resourceName, pId));
+      }
+      HelixDataAccessor accessor = _manager.getHelixDataAccessor();
+      PropertyKey propertyKey = accessor.keyBuilder().idealStates(resourceName);
+      accessor.setProperty(propertyKey, taskIs);
+    }
+
+    // Update rebalancer context, previous ideal state.
+    TaskUtil.setTaskContext(_manager, resourceName, taskCtx);
+    TaskUtil.setWorkflowContext(_manager, workflowResource, workflowCtx);
+    TaskUtil.setPrevResourceAssignment(_manager, resourceName, newAssignment);
+
+    return newAssignment;
+  }
+
+  private static ResourceAssignment computeResourceMapping(String taskResource,
+                                                           WorkflowConfig workflowConfig,
+                                                           TaskConfig taskCfg,
+                                                           ResourceAssignment prevAssignment,
+                                                           IdealState tgtResourceIs,
+                                                           Iterable<String> liveInstances,
+                                                           CurrentStateOutput currStateOutput,
+                                                           WorkflowContext workflowCtx,
+                                                           TaskContext taskCtx,
+                                                           Set<Integer> partitionsToDropFromIs)
+  {
+    TargetState taskTgtState = workflowConfig.getTargetState();
+
+    // Update running status in workflow context
+    if (taskTgtState == TargetState.STOP)
+    {
+      workflowCtx.setTaskState(taskResource, TaskState.STOPPED);
+      // Workflow has been stopped if all tasks are stopped
+      if (isWorkflowStopped(workflowCtx, workflowConfig))
+      {
+        workflowCtx.setWorkflowState(TaskState.STOPPED);
+      }
+    }
+    else
+    {
+      workflowCtx.setTaskState(taskResource, TaskState.IN_PROGRESS);
+      // Workflow is in progress if any task is in progress
+      workflowCtx.setWorkflowState(TaskState.IN_PROGRESS);
+    }
+
+    // Used to keep track of task partitions that have already been assigned to instances.
+    Set<Integer> assignedPartitions = new HashSet<Integer>();
+
+    // Keeps a mapping of (partition) -> (instance, state)
+    Map<Integer, PartitionAssignment> paMap = new TreeMap<Integer, PartitionAssignment>();
+
+    // Process all the current assignments of task partitions.
+    Set<Integer> allPartitions = getAllTaskPartitions(tgtResourceIs, taskCfg);
+    Map<String, SortedSet<Integer>> taskAssignments = getTaskPartitionAssignments(liveInstances,
+                                                                                  prevAssignment,
+                                                                                  allPartitions);
+    for (String instance : taskAssignments.keySet())
+    {
+      Set<Integer> pSet = taskAssignments.get(instance);
+      // Used to keep track of partitions that are in one of the final states: COMPLETED, TIMED_OUT, TASK_ERROR, ERROR.
+      Set<Integer> donePartitions = new TreeSet<Integer>();
+      for (int pId : pSet)
+      {
+        final String pName = pName(taskResource, pId);
+
+        // Check for pending state transitions on this (partition, instance).
+        String pendingState = currStateOutput.getPendingState(taskResource,
+                                                              new Partition(pName),
+                                                              instance);
+        if (pendingState != null)
+        {
+          // There is a pending state transition for this (partition, instance). Just copy forward the state
+          // assignment from the previous ideal state.
+          Map<String, String> stateMap = prevAssignment.getReplicaMap(new Partition(pName));
+          if (stateMap != null)
+          {
+            String prevState = stateMap.get(instance);
+            paMap.put(pId, new PartitionAssignment(instance, prevState));
+            assignedPartitions.add(pId);
+            LOG.debug(String.format("Task partition %s has a pending state transition on instance %s. Using the previous ideal state which was %s.",
+                                   pName,
+                                   instance,
+                                   prevState));
+          }
+
+          continue;
+        }
+
+        TaskPartitionState currState = TaskPartitionState.valueOf(currStateOutput.getCurrentState(taskResource,
+                                                                                                  new Partition(pName),
+                                                                                                  instance));
+
+        // Process any requested state transitions.
+        String requestedStateStr = currStateOutput.getRequestedState(taskResource,
+                                                                     new Partition(pName),
+                                                                     instance);
+        if (requestedStateStr != null && !requestedStateStr.isEmpty())
+        {
+          TaskPartitionState requestedState = TaskPartitionState.valueOf(requestedStateStr);
+          if (requestedState.equals(currState))
+          {
+            LOG.warn(String.format("Requested state %s is the same as the current state for instance %s.",
+                                   requestedState,
+                                   instance));
+          }
+
+          paMap.put(pId, new PartitionAssignment(instance, requestedState.name()));
+          assignedPartitions.add(pId);
+          LOG.debug(String.format("Instance %s requested a state transition to %s for partition %s.",
+                                 instance,
+                                 requestedState,
+                                 pName));
+          continue;
+        }
+
+        switch (currState)
+        {
+          case RUNNING:
+          case STOPPED:
+          {
+            TaskPartitionState nextState;
+            if (taskTgtState == TargetState.START)
+            {
+              nextState = TaskPartitionState.RUNNING;
+            }
+            else
+            {
+              nextState = TaskPartitionState.STOPPED;
+            }
+
+            paMap.put(pId, new PartitionAssignment(instance, nextState.name()));
+            assignedPartitions.add(pId);
+            LOG.debug(String.format("Setting task partition %s state to %s on instance %s.",
+                                   pName,
+                                   nextState,
+                                   instance));
+          }
+          break;
+          case COMPLETED:
+          {
+            // The task has completed on this partition. Mark as such in the context object.
+            donePartitions.add(pId);
+            LOG.debug(String.format("Task partition %s has completed with state %s. Marking as such in rebalancer context.",
+                                   pName,
+                                   currState));
+            partitionsToDropFromIs.add(pId);
+            markPartitionCompleted(taskCtx, pId);
+          }
+          break;
+          case TIMED_OUT:
+          case TASK_ERROR:
+          case ERROR:
+          {
+            donePartitions.add(pId); // The task may be rescheduled on a different instance.
+            LOG.debug(String.format("Task partition %s has error state %s. Marking as such in rebalancer context.",
+                                   pName,
+                                   currState));
+            markPartitionError(taskCtx, pId, currState);
+            // The error policy is to fail the task as soon a single partition fails for a specified maximum number of
+            // attempts.
+            if (taskCtx.getPartitionNumAttempts(pId) >= taskCfg.getMaxAttemptsPerPartition())
+            {
+              workflowCtx.setTaskState(taskResource, TaskState.FAILED);
+              workflowCtx.setWorkflowState(TaskState.FAILED);
+              addAllPartitions(tgtResourceIs.getPartitionSet(), partitionsToDropFromIs);
+              return emptyAssignment(taskResource);
+            }
+          }
+          break;
+          case INIT:
+          case DROPPED:
+          {
+            // currState in [INIT, DROPPED]. Do nothing, the partition is eligible to be reassigned.
+            donePartitions.add(pId);
+            LOG.debug(String.format("Task partition %s has state %s. It will be dropped from the current ideal state.",
+                                   pName,
+                                   currState));
+          }
+          break;
+          default:
+            throw new AssertionError("Unknown enum symbol: " + currState);
+        }
+      }
+
+      // Remove the set of task partitions that are completed or in one of the error states.
+      pSet.removeAll(donePartitions);
+    }
+
+    if (isTaskComplete(taskCtx, allPartitions))
+    {
+      workflowCtx.setTaskState(taskResource, TaskState.COMPLETED);
+      if (isWorkflowComplete(workflowCtx, workflowConfig))
+      {
+        workflowCtx.setWorkflowState(TaskState.COMPLETED);
+        workflowCtx.setFinishTime(System.currentTimeMillis());
+      }
+    }
+
+    // Make additional task assignments if needed.
+    if (taskTgtState == TargetState.START)
+    {
+      // Contains the set of task partitions that must be excluded from consideration when making any new assignments.
+      // This includes all completed, failed, already assigned partitions.
+      Set<Integer> excludeSet = Sets.newTreeSet(assignedPartitions);
+      addCompletedPartitions(excludeSet, taskCtx, allPartitions);
+      // Get instance->[partition, ...] mappings for the target resource.
+      Map<String, SortedSet<Integer>> tgtPartitionAssignments = getTgtPartitionAssignment(currStateOutput,
+                                                                                          liveInstances,
+                                                                                          tgtResourceIs,
+                                                                                          taskCfg.getTargetPartitionStates(),
+                                                                                          allPartitions);
+      for (Map.Entry<String, SortedSet<Integer>> entry : taskAssignments.entrySet())
+      {
+        String instance = entry.getKey();
+        // Contains the set of task partitions currently assigned to the instance.
+        Set<Integer> pSet = entry.getValue();
+        int numToAssign = taskCfg.getNumConcurrentTasksPerInstance() - pSet.size();
+        if (numToAssign > 0)
+        {
+          List<Integer> nextPartitions = getNextPartitions(tgtPartitionAssignments.get(instance),
+                                                           excludeSet,
+                                                           numToAssign);
+          for (Integer pId : nextPartitions)
+          {
+            String pName = pName(taskResource, pId);
+            paMap.put(pId, new PartitionAssignment(instance, TaskPartitionState.RUNNING.name()));
+            excludeSet.add(pId);
+            LOG.debug(String.format("Setting task partition %s state to %s on instance %s.",
+                                   pName,
+                                   TaskPartitionState.RUNNING,
+                                   instance));
+          }
+        }
+      }
+    }
+
+    // Construct a ResourceAssignment object from the map of partition assignments.
+    ResourceAssignment ra = new ResourceAssignment(taskResource);
+    for (Map.Entry<Integer, PartitionAssignment> e : paMap.entrySet())
+    {
+      PartitionAssignment pa = e.getValue();
+      ra.addReplicaMap(new Partition(pName(taskResource, e.getKey())), ImmutableMap.of(pa._instance, pa._state));
+    }
+
+    return ra;
+  }
+
+  /**
+   * Checks if the task has completed.
+   *
+   * @param ctx           The rebalancer context.
+   * @param allPartitions The set of partitions to check.
+   *
+   * @return true if all task partitions have been marked with status {@link TaskPartitionState#COMPLETED} in the rebalancer
+   *         context, false otherwise.
+   */
+  private static boolean isTaskComplete(TaskContext ctx, Set<Integer> allPartitions)
+  {
+    for (Integer pId : allPartitions)
+    {
+      TaskPartitionState state = ctx.getPartitionState(pId);
+      if (state != TaskPartitionState.COMPLETED)
+      {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Checks if the workflow has completed.
+   *
+   * @param ctx Workflow context containing task states
+   * @param cfg Workflow config containing set of tasks
+   *
+   * @return returns true if all tasks are {@link TaskState#COMPLETED}, false otherwise.
+   */
+  private static boolean isWorkflowComplete(WorkflowContext ctx, WorkflowConfig cfg)
+  {
+    for (String task : cfg.getTaskDag().getAllNodes())
+    {
+      if(ctx.getTaskState(task) != TaskState.COMPLETED)
+      {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Checks if the workflow has been stopped.
+   *
+   * @param ctx Workflow context containing task states
+   * @param cfg Workflow config containing set of tasks
+   *
+   * @return returns true if all tasks are {@link TaskState#STOPPED}, false otherwise.
+   */
+  private static boolean isWorkflowStopped(WorkflowContext ctx, WorkflowConfig cfg)
+  {
+    for (String task : cfg.getTaskDag().getAllNodes())
+    {
+      if(ctx.getTaskState(task) != TaskState.STOPPED && ctx.getTaskState(task) != null)
+      {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  private static void markForDeletion(HelixManager mgr, String resourceName)
+  {
+    mgr.getConfigAccessor().set(TaskUtil.getResourceConfigScope(mgr.getClusterName(), resourceName),
+                                WorkflowConfig.TARGET_STATE,
+                                TargetState.DELETE.name());
+  }
+
+  /**
+   * Cleans up all Helix state associated with this task, wiping workflow-level information if this is the last
+   * remaining task in its workflow.
+   */
+  private static void cleanup(HelixManager mgr, String resourceName, WorkflowConfig cfg, String workflowResource)
+  {
+    HelixDataAccessor accessor = mgr.getHelixDataAccessor();
+    // Delete resource configs.
+    PropertyKey cfgKey = getConfigPropertyKey(accessor, resourceName);
+    if (!accessor.removeProperty(cfgKey))
+    {
+      throw new RuntimeException(String.format(
+          "Error occurred while trying to clean up task %s. Failed to remove node %s from Helix. Aborting further clean up steps.",
+          resourceName,
+          cfgKey));
+    }
+    // Delete property store information for this resource.
+    String propStoreKey = getRebalancerPropStoreKey(resourceName);
+    if (!mgr.getHelixPropertyStore().remove(propStoreKey, AccessOption.PERSISTENT))
+    {
+      throw new RuntimeException(String.format(
+          "Error occurred while trying to clean up task %s. Failed to remove node %s from Helix. Aborting further clean up steps.",
+          resourceName,
+          propStoreKey));
+    }
+    // Finally, delete the ideal state itself.
+    PropertyKey isKey = getISPropertyKey(accessor, resourceName);
+    if (!accessor.removeProperty(isKey))
+    {
+      throw new RuntimeException(String.format(
+          "Error occurred while trying to clean up task %s. Failed to remove node %s from Helix.",
+          resourceName,
+          isKey));
+    }
+    LOG.info(String.format("Successfully cleaned up task resource %s.", resourceName));
+
+    boolean lastInWorkflow = true;
+    for(String task : cfg.getTaskDag().getAllNodes())
+    {
+      // check if property store information or resource configs exist for this task
+      if(mgr.getHelixPropertyStore().exists(getRebalancerPropStoreKey(task), AccessOption.PERSISTENT)
+              || accessor.getProperty(getConfigPropertyKey(accessor, task)) != null
+              || accessor.getProperty(getISPropertyKey(accessor, task)) != null)
+      {
+        lastInWorkflow = false;
+      }
+    }
+
+    // clean up task-level info if this was the last in workflow
+    if(lastInWorkflow)
+    {
+      // delete workflow config
+      PropertyKey workflowCfgKey = getConfigPropertyKey(accessor, workflowResource);
+      if (!accessor.removeProperty(workflowCfgKey))
+      {
+        throw new RuntimeException(String.format(
+                "Error occurred while trying to clean up workflow %s. Failed to remove node %s from Helix. Aborting further clean up steps.",
+                workflowResource,
+                workflowCfgKey));
+      }
+      // Delete property store information for this workflow
+      String workflowPropStoreKey = getRebalancerPropStoreKey(workflowResource);
+      if (!mgr.getHelixPropertyStore().remove(workflowPropStoreKey, AccessOption.PERSISTENT))
+      {
+        throw new RuntimeException(String.format(
+                "Error occurred while trying to clean up workflow %s. Failed to remove node %s from Helix. Aborting further clean up steps.",
+                workflowResource,
+                workflowPropStoreKey));
+      }
+    }
+
+  }
+
+  private static String getRebalancerPropStoreKey(String resource)
+  {
+    return Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT, resource);
+  }
+
+  private static PropertyKey getISPropertyKey(HelixDataAccessor accessor, String resource)
+  {
+    return accessor.keyBuilder().idealStates(resource);
+  }
+
+  private static PropertyKey getConfigPropertyKey(HelixDataAccessor accessor, String resource)
+  {
+    return accessor.keyBuilder().resourceConfig(resource);
+  }
+
+  private static void addAllPartitions(Set<String> pNames, Set<Integer> pIds)
+  {
+    for (String pName : pNames)
+    {
+      pIds.add(pId(pName));
+    }
+  }
+
+  private static ResourceAssignment emptyAssignment(String name)
+  {
+    return new ResourceAssignment(name);
+  }
+
+  private static void addCompletedPartitions(Set<Integer> set, TaskContext ctx, Iterable<Integer> pIds)
+  {
+    for (Integer pId : pIds)
+    {
+      TaskPartitionState state = ctx.getPartitionState(pId);
+      if (state == TaskPartitionState.COMPLETED)
+      {
+        set.add(pId);
+      }
+    }
+  }
+
+  /**
+   * Returns the set of all partition ids for a task.
+   * <p/>
+   * If a set of partition ids was explicitly specified in the config, that is used. Otherwise, we use the list of all
+   * partition ids from the target resource.
+   */
+  private static Set<Integer> getAllTaskPartitions(IdealState tgtResourceIs, TaskConfig taskCfg)
+  {
+    Set<Integer> taskPartitions = new HashSet<Integer>();
+    if (taskCfg.getTargetPartitions() != null)
+    {
+      for (Integer pId : taskCfg.getTargetPartitions())
+      {
+        taskPartitions.add(pId);
+      }
+    }
+    else
+    {
+      for (String pName : tgtResourceIs.getPartitionSet())
+      {
+        taskPartitions.add(pId(pName));
+      }
+    }
+
+    return taskPartitions;
+  }
+
+  private static List<Integer> getNextPartitions(SortedSet<Integer> candidatePartitions, Set<Integer> excluded, int n)
+  {
+    List<Integer> result = new ArrayList<Integer>(n);
+    for (Integer pId : candidatePartitions)
+    {
+      if (result.size() >= n)
+      {
+        break;
+      }
+
+      if (!excluded.contains(pId))
+      {
+        result.add(pId);
+      }
+    }
+
+    return result;
+  }
+
+  private static void markPartitionCompleted(TaskContext ctx, int pId)
+  {
+    ctx.setPartitionState(pId, TaskPartitionState.COMPLETED);
+    ctx.setPartitionFinishTime(pId, System.currentTimeMillis());
+    ctx.incrementNumAttempts(pId);
+  }
+
+  private static void markPartitionError(TaskContext ctx, int pId, TaskPartitionState state)
+  {
+    ctx.setPartitionState(pId, state);
+    ctx.setPartitionFinishTime(pId, System.currentTimeMillis());
+    ctx.incrementNumAttempts(pId);
+  }
+
+  /**
+   * Get partition assignments for the target resource, but only for the partitions of interest.
+   *
+   * @param currStateOutput The current state of the instances in the cluster.
+   * @param instanceList    The set of instances.
+   * @param tgtIs           The ideal state of the target resource.
+   * @param tgtStates       Only partitions in this set of states will be considered. If null, partitions do not need to
+   *                        be in any specific state to be considered.
+   * @param includeSet      The set of partitions to consider.
+   *
+   * @return A map of instance vs set of partition ids assigned to that instance.
+   */
+  private static Map<String, SortedSet<Integer>> getTgtPartitionAssignment(CurrentStateOutput currStateOutput,
+                                                                           Iterable<String> instanceList,
+                                                                           IdealState tgtIs,
+                                                                           Set<String> tgtStates,
+                                                                           Set<Integer> includeSet)
+  {
+    Map<String, SortedSet<Integer>> result = new HashMap<String, SortedSet<Integer>>();
+    for (String instance : instanceList)
+    {
+      result.put(instance, new TreeSet<Integer>());
+    }
+
+    for (String pName : tgtIs.getPartitionSet())
+    {
+      int pId = pId(pName);
+      if (includeSet.contains(pId))
+      {
+        for (String instance : instanceList)
+        {
+          String state = currStateOutput.getCurrentState(tgtIs.getResourceName(), new Partition(pName), instance);
+          if (tgtStates == null || tgtStates.contains(state))
+          {
+            result.get(instance).add(pId);
+          }
+        }
+      }
+    }
+
+    return result;
+  }
+
+  /**
+   * Return the assignment of task partitions per instance.
+   */
+  private static Map<String, SortedSet<Integer>> getTaskPartitionAssignments(Iterable<String> instanceList,
+                                                                             ResourceAssignment assignment,
+                                                                             Set<Integer> includeSet)
+  {
+    Map<String, SortedSet<Integer>> result = new HashMap<String, SortedSet<Integer>>();
+    for (String instance : instanceList)
+    {
+      result.put(instance, new TreeSet<Integer>());
+    }
+
+    for (Partition partition : assignment.getMappedPartitions())
+    {
+      int pId = pId(partition.getPartitionName());
+      if (includeSet.contains(pId))
+      {
+        Map<String, String> replicaMap = assignment.getReplicaMap(partition);
+        for (String instance : replicaMap.keySet())
+        {
+          SortedSet<Integer> pList = result.get(instance);
+          if (pList != null)
+          {
+            pList.add(pId);
+          }
+        }
+      }
+    }
+
+    return result;
+  }
+
+  /**
+   * Computes the partition name given the resource name and partition id.
+   */
+  private static String pName(String resource, int pId)
+  {
+    return resource + "_" + pId;
+  }
+
+  /**
+   * Extracts the partition id from the given partition name.
+   */
+  private static int pId(String pName)
+  {
+    String[] tokens = pName.split("_");
+    return Integer.valueOf(tokens[tokens.length - 1]);
+  }
+
+  /**
+   * An (instance, state) pair.
+   */
+  private static class PartitionAssignment
+  {
+    private final String _instance;
+    private final String _state;
+
+    private PartitionAssignment(String instance, String state)
+    {
+      _instance = instance;
+      _state = state;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskResult.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskResult.java b/helix-core/src/main/java/org/apache/helix/task/TaskResult.java
new file mode 100644
index 0000000..d54e170
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskResult.java
@@ -0,0 +1,63 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+/**
+ * The result of a task execution.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskResult
+{
+  /**
+   * An enumeration of status codes.
+   */
+  public enum Status
+  {
+    /** The task completed normally. */
+    COMPLETED,
+    /** The task was cancelled externally, i.e. {@link org.apache.helix.task.Task#cancel()} was called. */
+    CANCELED,
+    /** The task encountered an error from which it could not recover. */
+    ERROR
+  }
+
+  private final Status _status;
+  private final String _info;
+
+  /**
+   * Constructs a new {@link TaskResult}.
+   *
+   * @param status The status code.
+   * @param info   Information that can be interpreted by the {@link Task} implementation that constructed this object.
+   *               May encode progress or check point information that can be used by the task to resume from where it
+   *               left off in a previous execution.
+   */
+  public TaskResult(Status status, String info)
+  {
+    _status = status;
+    _info = info;
+  }
+
+  public Status getStatus()
+  {
+    return _status;
+  }
+
+  public String getInfo()
+  {
+    return _info;
+  }
+
+  @Override
+  public String toString()
+  {
+    return "TaskResult{" +
+        "_status=" + _status +
+        ", _info='" + _info + '\'' +
+        '}';
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskRunner.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskRunner.java b/helix-core/src/main/java/org/apache/helix/task/TaskRunner.java
new file mode 100644
index 0000000..f071b1c
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskRunner.java
@@ -0,0 +1,190 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+import org.apache.helix.HelixManager;
+import org.apache.log4j.Logger;
+
+
+/**
+ * A wrapping {@link Runnable} used to manage the life-cycle of a user-defined {@link Task} implementation.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskRunner implements Runnable
+{
+  private static final Logger LOG = Logger.getLogger(TaskRunner.class);
+  private final HelixManager _manager;
+  private final String _taskName;
+  private final String _taskPartition;
+  private final String _sessionId;
+  private final String _instance;
+  // Synchronization object used to signal that the task has been scheduled on a thread.
+  private final Object _startedSync = new Object();
+  // Synchronization object used to signal that the task has finished.
+  private final Object _doneSync = new Object();
+  private final Task _task;
+  // Stores the result of the task once it has finished.
+  private volatile TaskResult _result = null;
+  // If true, indicates that the task has started.
+  private volatile boolean _started = false;
+  // If true, indicates that the task was canceled due to a task timeout.
+  private volatile boolean _timeout = false;
+  // If true, indicates that the task has finished.
+  private volatile boolean _done = false;
+
+  public TaskRunner(Task task,
+                    String taskName,
+                    String taskPartition,
+                    String instance,
+                    HelixManager manager,
+                    String sessionId)
+  {
+    _task = task;
+    _taskName = taskName;
+    _taskPartition = taskPartition;
+    _instance = instance;
+    _manager = manager;
+    _sessionId = sessionId;
+  }
+
+  @Override
+  public void run()
+  {
+    try
+    {
+      signalStarted();
+      _result = _task.run();
+
+      switch (_result.getStatus())
+      {
+        case COMPLETED:
+          requestStateTransition(TaskPartitionState.COMPLETED);
+          break;
+        case CANCELED:
+          if (_timeout)
+          {
+            requestStateTransition(TaskPartitionState.TIMED_OUT);
+          }
+          // Else the state transition to CANCELED was initiated by the controller.
+          break;
+        case ERROR:
+          requestStateTransition(TaskPartitionState.TASK_ERROR);
+          break;
+        default:
+          throw new AssertionError("Unknown result type.");
+      }
+    }
+    catch (Exception e)
+    {
+      requestStateTransition(TaskPartitionState.TASK_ERROR);
+    }
+    finally
+    {
+      synchronized (_doneSync)
+      {
+        _done = true;
+        _doneSync.notifyAll();
+      }
+    }
+  }
+
+  /**
+   * Signals the task to cancel itself.
+   */
+  public void timeout()
+  {
+    _timeout = true;
+    cancel();
+  }
+
+  /**
+   * Signals the task to cancel itself.
+   */
+  public void cancel()
+  {
+    _task.cancel();
+  }
+
+  /**
+   * Waits uninterruptibly until the task has started.
+   */
+  public void waitTillStarted()
+  {
+    synchronized (_startedSync)
+    {
+      while (!_started)
+      {
+        try
+        {
+          _startedSync.wait();
+        }
+        catch (InterruptedException e)
+        {
+          LOG.warn(String.format("Interrupted while waiting for task %s to start.", _taskPartition), e);
+        }
+      }
+    }
+  }
+
+  /**
+   * Waits uninterruptibly until the task has finished, either normally or due to an error/cancellation..
+   */
+  public TaskResult waitTillDone()
+  {
+    synchronized (_doneSync)
+    {
+      while (!_done)
+      {
+        try
+        {
+          _doneSync.wait();
+        }
+        catch (InterruptedException e)
+        {
+          LOG.warn(String.format("Interrupted while waiting for task %s to complete.", _taskPartition), e);
+        }
+      }
+    }
+    return _result;
+  }
+
+  /**
+   * Signals any threads waiting for this task to start.
+   */
+  private void signalStarted()
+  {
+    synchronized (_startedSync)
+    {
+      _started = true;
+      _startedSync.notifyAll();
+    }
+  }
+
+  /**
+   * Requests the controller for a state transition.
+   *
+   * @param state The state transition that is being requested.
+   */
+  private void requestStateTransition(TaskPartitionState state)
+  {
+    boolean success = TaskUtil.setRequestedState(_manager.getHelixDataAccessor(),
+                                                 _instance,
+                                                 _sessionId,
+                                                 _taskName,
+                                                 _taskPartition,
+                                                 state);
+    if (!success)
+    {
+      LOG.error(String.format(
+          "Failed to set the requested state to %s for instance %s, session id %s, task partition %s.",
+          state,
+          _instance,
+          _sessionId,
+          _taskPartition));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskState.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskState.java b/helix-core/src/main/java/org/apache/helix/task/TaskState.java
new file mode 100644
index 0000000..cf78109
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskState.java
@@ -0,0 +1,31 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+/**
+ * Enumeration of current task states. This value is stored in the rebalancer context.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public enum TaskState
+{
+  /**
+   * The task is in progress.
+   */
+  IN_PROGRESS,
+  /**
+   * The task has been stopped. It may be resumed later.
+   */
+  STOPPED,
+  /**
+   * The task has failed. It cannot be resumed.
+   */
+  FAILED,
+  /**
+   * All the task partitions have completed normally.
+   */
+  COMPLETED
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskStateModel.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskStateModel.java b/helix-core/src/main/java/org/apache/helix/task/TaskStateModel.java
new file mode 100644
index 0000000..fa35c63
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskStateModel.java
@@ -0,0 +1,266 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+import java.util.Map;
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ThreadFactory;
+import org.apache.helix.HelixManager;
+import org.apache.helix.NotificationContext;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+
+/**
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+@StateModelInfo(states = "{'NOT USED BY HELIX'}", initialState = "INIT")
+public class TaskStateModel extends StateModel
+{
+  private static final Logger LOG = Logger.getLogger(TaskStateModel.class);
+  private final HelixManager _manager;
+  private final ExecutorService _taskExecutor;
+  private final Map<String, TaskFactory> _taskFactoryRegistry;
+  private final Timer _timer = new Timer("TaskStateModel time out daemon", true);
+  private TaskRunner _taskRunner;
+
+  public TaskStateModel(HelixManager manager, Map<String, TaskFactory> taskFactoryRegistry)
+  {
+    _manager = manager;
+    _taskFactoryRegistry = taskFactoryRegistry;
+    _taskExecutor = Executors.newFixedThreadPool(40, new ThreadFactory()
+    {
+      @Override
+      public Thread newThread(Runnable r)
+      {
+        return new Thread(r, "TaskStateModel-thread-pool");
+      }
+    });
+  }
+
+  @Transition(to = "RUNNING", from = "INIT")
+  public void onBecomeRunningFromInit(Message msg, NotificationContext context)
+  {
+    startTask(msg, msg.getPartitionName());
+  }
+
+  @Transition(to = "STOPPED", from = "RUNNING")
+  public String onBecomeStoppedFromRunning(Message msg, NotificationContext context)
+  {
+    String taskPartition = msg.getPartitionName();
+    if (_taskRunner == null)
+    {
+      throw new IllegalStateException(String.format("Invalid state transition. There is no running task for partition %s.",
+                                                    taskPartition));
+    }
+
+    _taskRunner.cancel();
+    TaskResult r = _taskRunner.waitTillDone();
+    LOG.info(String.format("Task %s completed with result %s.", msg.getPartitionName(), r));
+
+    return r.getInfo();
+  }
+
+  @Transition(to = "COMPLETED", from = "RUNNING")
+  public void onBecomeCompletedFromRunning(Message msg, NotificationContext context)
+  {
+    String taskPartition = msg.getPartitionName();
+    if (_taskRunner == null)
+    {
+      throw new IllegalStateException(String.format("Invalid state transition. There is no running task for partition %s.",
+                                                    taskPartition));
+    }
+
+    TaskResult r = _taskRunner.waitTillDone();
+    if (r.getStatus() != TaskResult.Status.COMPLETED)
+    {
+      throw new IllegalStateException(String.format("Partition %s received a state transition to %s but the result status code is %s.",
+                                                    msg.getPartitionName(),
+                                                    msg.getToState(),
+                                                    r.getStatus()));
+    }
+  }
+
+  @Transition(to = "TIMED_OUT", from = "RUNNING")
+  public String onBecomeTimedOutFromRunning(Message msg, NotificationContext context)
+  {
+    String taskPartition = msg.getPartitionName();
+    if (_taskRunner == null)
+    {
+      throw new IllegalStateException(String.format("Invalid state transition. There is no running task for partition %s.",
+                                                    taskPartition));
+    }
+
+    TaskResult r = _taskRunner.waitTillDone();
+    if (r.getStatus() != TaskResult.Status.CANCELED)
+    {
+      throw new IllegalStateException(String.format("Partition %s received a state transition to %s but the result status code is %s.",
+                                                    msg.getPartitionName(),
+                                                    msg.getToState(),
+                                                    r.getStatus()));
+    }
+
+    return r.getInfo();
+  }
+
+  @Transition(to = "TASK_ERROR", from = "RUNNING")
+  public String onBecomeTaskErrorFromRunning(Message msg, NotificationContext context)
+  {
+    String taskPartition = msg.getPartitionName();
+    if (_taskRunner == null)
+    {
+      throw new IllegalStateException(String.format("Invalid state transition. There is no running task for partition %s.",
+                                                    taskPartition));
+    }
+
+    TaskResult r = _taskRunner.waitTillDone();
+    if (r.getStatus() != TaskResult.Status.ERROR)
+    {
+      throw new IllegalStateException(String.format("Partition %s received a state transition to %s but the result status code is %s.",
+                                                    msg.getPartitionName(),
+                                                    msg.getToState(),
+                                                    r.getStatus()));
+    }
+
+    return r.getInfo();
+  }
+
+  @Transition(to = "RUNNING", from = "STOPPED")
+  public void onBecomeRunningFromStopped(Message msg, NotificationContext context)
+  {
+    startTask(msg, msg.getPartitionName());
+  }
+
+  @Transition(to = "DROPPED", from = "INIT")
+  public void onBecomeDroppedFromInit(Message msg, NotificationContext context)
+  {
+    _taskRunner = null;
+  }
+
+  @Transition(to = "DROPPED", from = "RUNNING")
+  public void onBecomeDroppedFromRunning(Message msg, NotificationContext context)
+  {
+    String taskPartition = msg.getPartitionName();
+    if (_taskRunner == null)
+    {
+      throw new IllegalStateException(String.format("Invalid state transition. There is no running task for partition %s.",
+                                                    taskPartition));
+    }
+
+    _taskRunner.cancel();
+    TaskResult r = _taskRunner.waitTillDone();
+    LOG.info(String.format("Task partition %s returned result %s.", msg.getPartitionName(), r));
+    _taskRunner = null;
+  }
+
+  @Transition(to = "DROPPED", from = "COMPLETED")
+  public void onBecomeDroppedFromCompleted(Message msg, NotificationContext context)
+  {
+    _taskRunner = null;
+  }
+
+  @Transition(to = "DROPPED", from = "STOPPED")
+  public void onBecomeDroppedFromStopped(Message msg, NotificationContext context)
+  {
+    _taskRunner = null;
+  }
+
+  @Transition(to = "DROPPED", from = "TIMED_OUT")
+  public void onBecomeDroppedFromTimedOut(Message msg, NotificationContext context)
+  {
+    _taskRunner = null;
+  }
+
+  @Transition(to = "DROPPED", from = "TASK_ERROR")
+  public void onBecomeDroppedFromTaskError(Message msg, NotificationContext context)
+  {
+    _taskRunner = null;
+  }
+
+  @Transition(to = "INIT", from = "RUNNING")
+  public void onBecomeInitFromRunning(Message msg, NotificationContext context)
+  {
+    String taskPartition = msg.getPartitionName();
+    if (_taskRunner == null)
+    {
+      throw new IllegalStateException(String.format("Invalid state transition. There is no running task for partition %s.",
+                                                    taskPartition));
+    }
+
+    _taskRunner.cancel();
+    TaskResult r = _taskRunner.waitTillDone();
+    LOG.info(String.format("Task partition %s returned result %s.", msg.getPartitionName(), r));
+    _taskRunner = null;
+  }
+
+  @Transition(to = "INIT", from = "COMPLETED")
+  public void onBecomeInitFromCompleted(Message msg, NotificationContext context)
+  {
+    _taskRunner = null;
+  }
+
+  @Transition(to = "INIT", from = "STOPPED")
+  public void onBecomeInitFromStopped(Message msg, NotificationContext context)
+  {
+    _taskRunner = null;
+  }
+
+  @Transition(to = "INIT", from = "TIMED_OUT")
+  public void onBecomeInitFromTimedOut(Message msg, NotificationContext context)
+  {
+    _taskRunner = null;
+  }
+
+  @Transition(to = "INIT", from = "TASK_ERROR")
+  public void onBecomeInitFromTaskError(Message msg, NotificationContext context)
+  {
+    _taskRunner = null;
+  }
+
+  @Override
+  public void reset()
+  {
+    if (_taskRunner != null)
+    {
+      _taskRunner.cancel();
+    }
+  }
+
+  private void startTask(Message msg, String taskPartition)
+  {
+    TaskConfig cfg = TaskUtil.getTaskCfg(_manager, msg.getResourceName());
+    TaskFactory taskFactory = _taskFactoryRegistry.get(cfg.getCommand());
+    Task task = taskFactory.createNewTask(cfg.getCommandConfig());
+
+    _taskRunner = new TaskRunner(task,
+                                 msg.getResourceName(),
+                                 taskPartition,
+                                 msg.getTgtName(),
+                                 _manager,
+                                 msg.getTgtSessionId());
+    _taskExecutor.submit(_taskRunner);
+    _taskRunner.waitTillStarted();
+
+    // Set up a timer to cancel the task when its time out expires.
+    _timer.schedule(new TimerTask()
+    {
+      @Override
+      public void run()
+      {
+        if (_taskRunner != null)
+        {
+          _taskRunner.timeout();
+        }
+      }
+    }, cfg.getTimeoutPerPartition());
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskStateModelFactory.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskStateModelFactory.java b/helix-core/src/main/java/org/apache/helix/task/TaskStateModelFactory.java
new file mode 100644
index 0000000..8aa3868
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskStateModelFactory.java
@@ -0,0 +1,34 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+import java.util.Map;
+import org.apache.helix.HelixManager;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+
+
+/**
+ * Factory class for {@link TaskStateModel}.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskStateModelFactory extends StateModelFactory<TaskStateModel>
+{
+  private final HelixManager _manager;
+  private final Map<String, TaskFactory> _taskFactoryRegistry;
+
+  public TaskStateModelFactory(HelixManager manager, Map<String, TaskFactory> taskFactoryRegistry)
+  {
+    _manager = manager;
+    _taskFactoryRegistry = taskFactoryRegistry;
+  }
+
+  @Override
+  public TaskStateModel createNewStateModel(String partitionName)
+  {
+    return new TaskStateModel(_manager, _taskFactoryRegistry);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskUtil.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskUtil.java b/helix-core/src/main/java/org/apache/helix/task/TaskUtil.java
new file mode 100644
index 0000000..d7b235e
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskUtil.java
@@ -0,0 +1,161 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+import com.google.common.base.Joiner;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.helix.AccessOption;
+import org.apache.helix.ConfigAccessor;
+import org.apache.helix.HelixDataAccessor;
+import org.apache.helix.HelixManager;
+import org.apache.helix.PropertyKey;
+import org.apache.helix.ZNRecord;
+import org.apache.helix.model.CurrentState;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.ResourceAssignment;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.log4j.Logger;
+
+
+/**
+ * Static utility methods.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskUtil
+{
+  private static final Logger LOG = Logger.getLogger(TaskUtil.class);
+  private static final String CONTEXT_NODE = "Context";
+  private static final String PREV_RA_NODE = "PreviousResourceAssignment";
+
+  /**
+   * Parses task resource configurations in Helix into a {@link TaskConfig} object.
+   *
+   * @param manager      HelixManager object used to connect to Helix.
+   * @param taskResource The name of the task resource.
+   *
+   * @return A {@link TaskConfig} object if Helix contains valid configurations for the task, null otherwise.
+   */
+  public static TaskConfig getTaskCfg(HelixManager manager, String taskResource)
+  {
+    Map<String, String> taskCfg = getResourceConfigMap(manager, taskResource);
+    TaskConfig.Builder b = TaskConfig.Builder.fromMap(taskCfg);
+
+    return b.build();
+  }
+
+  public static WorkflowConfig getWorkflowCfg(HelixManager manager, String workflowResource)
+  {
+    Map<String, String> workflowCfg = getResourceConfigMap(manager, workflowResource);
+    WorkflowConfig.Builder b = WorkflowConfig.Builder.fromMap(workflowCfg);
+
+    return b.build();
+  }
+  public static boolean setRequestedState(HelixDataAccessor accessor,
+                                          String instance,
+                                          String sessionId,
+                                          String resource,
+                                          String partition,
+                                          TaskPartitionState state)
+  {
+    LOG.debug(String.format("Requesting a state transition to %s for partition %s.", state, partition));
+    try
+    {
+      PropertyKey.Builder keyBuilder = accessor.keyBuilder();
+      PropertyKey key = keyBuilder.currentState(instance, sessionId, resource);
+      CurrentState currStateDelta = new CurrentState(resource);
+      currStateDelta.setRequestedState(partition, state.name());
+
+      return accessor.updateProperty(key, currStateDelta);
+    }
+    catch (Exception e)
+    {
+      LOG.error(String.format("Error when requesting a state transition to %s for partition %s.", state, partition), e);
+      return false;
+    }
+  }
+
+  public static HelixConfigScope getResourceConfigScope(String clusterName, String resource)
+  {
+    return new HelixConfigScopeBuilder(HelixConfigScope.ConfigScopeProperty.RESOURCE)
+          .forCluster(clusterName).forResource(resource).build();
+  }
+
+  public static ResourceAssignment getPrevResourceAssignment(HelixManager manager, String resourceName)
+  {
+    ZNRecord r = manager.getHelixPropertyStore().get(Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT,
+                                                                         resourceName, PREV_RA_NODE), null, AccessOption.PERSISTENT);
+    return r != null ? new ResourceAssignment(r) : null;
+  }
+
+  public static void setPrevResourceAssignment(HelixManager manager, String resourceName, ResourceAssignment ra)
+  {
+    manager.getHelixPropertyStore().set(Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT,
+                                                            resourceName, PREV_RA_NODE), ra.getRecord(), AccessOption.PERSISTENT);
+  }
+
+  public static TaskContext getTaskContext(HelixManager manager, String taskResource)
+  {
+    ZNRecord r = manager.getHelixPropertyStore().get(Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT,
+                                                                         taskResource,
+                                                                         CONTEXT_NODE), null, AccessOption.PERSISTENT);
+    return r != null ? new TaskContext(r) : null;
+  }
+
+  public static void setTaskContext(HelixManager manager, String taskResource, TaskContext ctx)
+  {
+    manager.getHelixPropertyStore().set(Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT,
+                                                            taskResource,
+                                                            CONTEXT_NODE), ctx.getRecord(), AccessOption.PERSISTENT);
+  }
+
+  public static WorkflowContext getWorkflowContext(HelixManager manager, String workflowResource)
+  {
+    ZNRecord r = manager.getHelixPropertyStore().get(Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT,
+            workflowResource,
+            CONTEXT_NODE), null, AccessOption.PERSISTENT);
+    return r != null ? new WorkflowContext(r) : null;
+  }
+
+  public static void setWorkflowContext(HelixManager manager, String workflowResource, WorkflowContext ctx)
+  {
+    manager.getHelixPropertyStore().set(Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT,
+            workflowResource,
+            CONTEXT_NODE), ctx.getRecord(), AccessOption.PERSISTENT);
+  }
+
+  public static String getNamespacedTaskName(String singleTaskWorkflow)
+  {
+    return getNamespacedTaskName(singleTaskWorkflow, singleTaskWorkflow);
+  }
+
+  public static String getNamespacedTaskName(String workflowResource, String taskName)
+  {
+    return workflowResource + "_" + taskName;
+  }
+
+  private static Map<String, String> getResourceConfigMap(HelixManager manager, String resource)
+  {
+    HelixConfigScope scope = getResourceConfigScope(manager.getClusterName(), resource);
+    ConfigAccessor configAccessor = manager.getConfigAccessor();
+
+    Map<String, String> taskCfg = new HashMap<String, String>();
+    List<String> cfgKeys = configAccessor.getKeys(scope);
+    if (cfgKeys == null || cfgKeys.isEmpty())
+    {
+      return null;
+    }
+
+    for (String cfgKey : cfgKeys)
+    {
+      taskCfg.put(cfgKey, configAccessor.get(scope, cfgKey));
+    }
+
+    return taskCfg;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/Workflow.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/Workflow.java b/helix-core/src/main/java/org/apache/helix/task/Workflow.java
new file mode 100644
index 0000000..0e73e3f
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/Workflow.java
@@ -0,0 +1,261 @@
+package org.apache.helix.task;
+
+
+import com.google.common.base.Joiner;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
+import org.apache.helix.task.beans.TaskBean;
+import org.apache.helix.task.beans.WorkflowBean;
+import org.yaml.snakeyaml.Yaml;
+import org.yaml.snakeyaml.constructor.Constructor;
+
+
+/**
+ * Houses a task dag and config set to fully describe a task workflow
+ *
+ * @author Chris Beavers <cb...@linkedin.com>
+ */
+public class Workflow
+{
+  /** Default workflow name, useful constant for single-node workflows */
+  public static final String UNSPECIFIED = "UNSPECIFIED";
+
+  /** Workflow name */
+  private String _name;
+
+  /** Holds workflow-level configurations */
+  private WorkflowConfig _workflowConfig;
+
+  /** Contains the per-task configurations for all tasks specified in the provided dag */
+  private Map<String, Map<String, String>> _taskConfigs;
+
+  /** Constructs and validates a workflow against a provided dag and config set */
+  private Workflow(String name, WorkflowConfig workflowConfig, Map<String, Map<String, String>> taskConfigs)
+  {
+    _name = name;
+    _workflowConfig = workflowConfig;
+    _taskConfigs = taskConfigs;
+
+    validate();
+  }
+
+  public String getName()
+  {
+    return _name;
+  }
+
+  public Map<String, Map<String, String>> getTaskConfigs()
+  {
+    return _taskConfigs;
+  }
+
+  public Map<String, String> getResourceConfigMap() throws Exception
+  {
+    Map<String, String> cfgMap = new HashMap<String,String>();
+    cfgMap.put(WorkflowConfig.DAG, _workflowConfig.getTaskDag().toJson());
+    cfgMap.put(WorkflowConfig.EXPIRY, String.valueOf(_workflowConfig.getExpiry()));
+    cfgMap.put(WorkflowConfig.TARGET_STATE, _workflowConfig.getTargetState().name());
+
+    return cfgMap;
+  }
+
+  /**
+   * Parses the YAML description from a file into a {@link Workflow} object.
+   *
+   * @param file An abstract path name to the file containing the workflow description.
+   *
+   * @return A {@link Workflow} object.
+   *
+   * @throws Exception
+   */
+  public static Workflow parse(File file)
+      throws Exception
+  {
+    BufferedReader br = new BufferedReader(new FileReader(file));
+    return parse(br);
+  }
+
+  /**
+   * Parses a YAML description of the workflow into a {@link Workflow} object. The YAML string is of the following
+   * form:
+   * <p/>
+   * <pre>
+   * name: MyFlow
+   * tasks:
+   *   - name : TaskA
+   *     command : SomeTask
+   *     ...
+   *   - name : TaskB
+   *     parents : [TaskA]
+   *     command : SomeOtherTask
+   *     ...
+   *   - name : TaskC
+   *     command : AnotherTask
+   *     ...
+   *   - name : TaskD
+   *     parents : [TaskB, TaskC]
+   *     command : AnotherTask
+   *     ...
+   * </pre>
+   *
+   * @param yaml A YAML string of the above form
+   *
+   * @return A {@link Workflow} object.
+   */
+  public static Workflow parse(String yaml)
+      throws Exception
+  {
+    return parse(new StringReader(yaml));
+  }
+
+  /** Helper function to parse workflow from a generic {@link Reader} */
+  private static Workflow parse(Reader reader) throws Exception
+  {
+    Yaml yaml = new Yaml(new Constructor(WorkflowBean.class));
+    WorkflowBean wf = (WorkflowBean) yaml.load(reader);
+    Builder builder = new Builder(wf.name);
+
+    for (TaskBean task : wf.tasks)
+    {
+      if (task.name == null)
+      {
+        throw new IllegalArgumentException("A task must have a name.");
+      }
+
+      if (task.parents != null)
+      {
+        for (String parent : task.parents)
+        {
+          builder.addParentChildDependency(parent, task.name);
+        }
+      }
+
+      builder.addConfig(task.name, TaskConfig.WORKFLOW_ID, wf.name);
+      builder.addConfig(task.name, TaskConfig.COMMAND, task.command);
+      if (task.commandConfig != null)
+      {
+        builder.addConfig(task.name, TaskConfig.COMMAND_CONFIG, task.commandConfig.toString());
+      }
+      builder.addConfig(task.name, TaskConfig.TARGET_RESOURCE, task.targetResource);
+      if (task.targetPartitionStates != null)
+      {
+        builder.addConfig(task.name, TaskConfig.TARGET_PARTITION_STATES, Joiner.on(",").join(task.targetPartitionStates));
+      }
+      if (task.targetPartitions != null)
+      {
+        builder.addConfig(task.name, TaskConfig.TARGET_PARTITIONS, Joiner.on(",").join(task.targetPartitions));
+      }
+      builder.addConfig(task.name, TaskConfig.MAX_ATTEMPTS_PER_PARTITION, String.valueOf(task.maxAttemptsPerPartition));
+      builder.addConfig(task.name, TaskConfig.NUM_CONCURRENT_TASKS_PER_INSTANCE, String.valueOf(task.numConcurrentTasksPerInstance));
+      builder.addConfig(task.name, TaskConfig.TIMEOUT_PER_PARTITION, String.valueOf(task.timeoutPerPartition));
+    }
+
+    return builder.build();
+  }
+
+  /**
+   * Verifies that all nodes in provided dag have accompanying config and vice-versa.
+   * Also checks dag for cycles and unreachable nodes, and ensures configs are valid.
+   * */
+  public void validate()
+  {
+    // validate dag and configs
+    if(!_taskConfigs.keySet().containsAll(_workflowConfig.getTaskDag().getAllNodes()))
+    {
+      throw new IllegalArgumentException("Nodes specified in DAG missing from config");
+    }
+    else if(!_workflowConfig.getTaskDag().getAllNodes().containsAll(_taskConfigs.keySet()))
+    {
+      throw new IllegalArgumentException("Given DAG lacks nodes with supplied configs");
+    }
+
+    _workflowConfig.getTaskDag().validate();
+
+    for(String node : _taskConfigs.keySet())
+    {
+      buildConfig(node);
+    }
+  }
+
+  /** Builds a TaskConfig from config map. Useful for validating configs */
+  private TaskConfig buildConfig(String task)
+  {
+    return TaskConfig.Builder.fromMap(_taskConfigs.get(task)).build();
+  }
+
+  /** Build a workflow incrementally from dependencies and single configs, validate at build time */
+  public static class Builder
+  {
+    private String _name;
+    private TaskDag _dag;
+    private Map<String, Map<String, String>> _taskConfigs;
+    private long _expiry;
+
+    public Builder(String name)
+    {
+      _name = name;
+      _dag = new TaskDag();
+      _taskConfigs = new TreeMap<String, Map<String, String>>();
+      _expiry = -1;
+    }
+
+    public Builder addConfig(String node, String key, String val)
+    {
+      node = namespacify(node);
+      _dag.addNode(node);
+
+      if(!_taskConfigs.containsKey(node))
+      {
+        _taskConfigs.put(node, new TreeMap<String, String>());
+      }
+      _taskConfigs.get(node).put(key, val);
+
+      return this;
+    }
+
+    public Builder addParentChildDependency(String parent, String child)
+    {
+      parent = namespacify(parent);
+      child = namespacify(child);
+      _dag.addParentToChild(parent, child);
+
+      return this;
+    }
+
+    public Builder setExpiry(long expiry)
+    {
+      _expiry = expiry;
+      return this;
+    }
+
+    public String namespacify(String task)
+    {
+      return TaskUtil.getNamespacedTaskName(_name, task);
+    }
+
+    public Workflow build()
+    {
+      for(String task : _taskConfigs.keySet())
+      {
+        //addConfig(task, TaskConfig.WORKFLOW_ID, _name);
+        _taskConfigs.get(task).put(TaskConfig.WORKFLOW_ID, _name);
+      }
+
+      WorkflowConfig.Builder builder = new WorkflowConfig.Builder();
+      builder.setTaskDag(_dag);
+      builder.setTargetState(TargetState.START);
+      if(_expiry > 0)
+      {
+        builder.setExpiry(_expiry);
+      }
+
+      return new Workflow(_name, builder.build(), _taskConfigs); // calls validate internally
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/WorkflowConfig.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/WorkflowConfig.java b/helix-core/src/main/java/org/apache/helix/task/WorkflowConfig.java
new file mode 100644
index 0000000..547a291
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/WorkflowConfig.java
@@ -0,0 +1,116 @@
+package org.apache.helix.task;
+
+import java.util.Map;
+
+/**
+ * Provides a typed interface to workflow level configurations. Validates the configurations.
+ *
+ * @author Chris Beavers <cb...@linkedin.com>
+ */
+public class WorkflowConfig
+{
+  /* Config fields */
+  public static final String DAG = "Dag";
+  public static final String TARGET_STATE = "TargetState";
+  public static final String EXPIRY = "Expiry";
+
+  /* Default values */
+  public static final long DEFAULT_EXPIRY = 24 * 60 * 60 * 1000;
+
+  /* Member variables */
+  private TaskDag _taskDag;
+  private TargetState _targetState;
+  private long _expiry;
+
+  private WorkflowConfig(TaskDag taskDag,
+          TargetState targetState,
+          long expiry)
+  {
+    _taskDag = taskDag;
+    _targetState = targetState;
+    _expiry = expiry;
+  }
+
+  public TaskDag getTaskDag()
+  {
+    return _taskDag;
+  }
+
+  public TargetState getTargetState()
+  {
+    return _targetState;
+  }
+
+  public long getExpiry()
+  {
+    return _expiry;
+  }
+
+  public static class Builder
+  {
+    private TaskDag _taskDag = TaskDag.EMPTY_DAG;
+    private TargetState _targetState = TargetState.START;
+    private long _expiry = DEFAULT_EXPIRY;
+
+    public Builder()
+    {
+      // Nothing to do
+    }
+
+    public WorkflowConfig build()
+    {
+      validate();
+
+      return new WorkflowConfig(_taskDag,
+              _targetState,
+              _expiry);
+    }
+
+    public Builder setTaskDag(TaskDag v)
+    {
+      _taskDag = v;
+      return this;
+    }
+
+    public Builder setExpiry(long v)
+    {
+      _expiry = v;
+      return this;
+    }
+
+    public Builder setTargetState(TargetState v)
+    {
+      _targetState = v;
+      return this;
+    }
+
+    public static Builder fromMap(Map<String, String> cfg)
+    {
+      Builder b = new Builder();
+
+      if (cfg.containsKey(EXPIRY))
+      {
+        b.setExpiry(Long.parseLong(cfg.get(EXPIRY)));
+      }
+      if (cfg.containsKey(DAG))
+      {
+        b.setTaskDag(TaskDag.fromJson(cfg.get(DAG)));
+      }
+      if (cfg.containsKey(TARGET_STATE))
+      {
+        b.setTargetState(TargetState.valueOf(cfg.get(TARGET_STATE)));
+      }
+
+      return b;
+    }
+
+    private void validate()
+    {
+      if (_expiry < 0)
+      {
+        throw new IllegalArgumentException(String.format("%s has invalid value %s", EXPIRY, _expiry));
+      }
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/WorkflowContext.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/WorkflowContext.java b/helix-core/src/main/java/org/apache/helix/task/WorkflowContext.java
new file mode 100644
index 0000000..6840a5a
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/WorkflowContext.java
@@ -0,0 +1,110 @@
+package org.apache.helix.task;
+
+import org.apache.helix.HelixProperty;
+import org.apache.helix.ZNRecord;
+
+import java.util.Map;
+import java.util.TreeMap;
+
+/**
+ * Typed interface to the workflow context information stored by {@link TaskRebalancer} in the Helix property store
+ *
+ * @author Chris Beavers <cb...@linkedin.com>
+ */
+public class WorkflowContext extends HelixProperty
+{
+  public static final String WORKFLOW_STATE = "STATE";
+  public static final String START_TIME = "START_TIME";
+  public static final String FINISH_TIME = "FINISH_TIME";
+  public static final String TASK_STATES = "TASK_STATES";
+  public static final int    UNFINISHED = -1;
+
+  public WorkflowContext(ZNRecord record)
+  {
+    super(record);
+  }
+
+  public void setWorkflowState(TaskState s)
+  {
+    if(_record.getSimpleField(WORKFLOW_STATE) == null)
+    {
+      _record.setSimpleField(WORKFLOW_STATE, s.name());
+    }
+    else if(!_record.getSimpleField(WORKFLOW_STATE).equals(TaskState.FAILED.name())
+         && !_record.getSimpleField(WORKFLOW_STATE).equals(TaskState.COMPLETED.name()))
+    {
+      _record.setSimpleField(WORKFLOW_STATE, s.name());
+    }
+  }
+
+  public TaskState getWorkflowState()
+  {
+    String s = _record.getSimpleField(WORKFLOW_STATE);
+    if(s == null)
+    {
+      return null;
+    }
+
+    return TaskState.valueOf(s);
+  }
+
+  public void setTaskState(String taskResource, TaskState s)
+  {
+    Map<String, String> states = _record.getMapField(TASK_STATES);
+    if(states == null)
+    {
+      states = new TreeMap<String, String>();
+      _record.setMapField(TASK_STATES, states);
+    }
+    states.put(taskResource, s.name());
+  }
+
+  public TaskState getTaskState(String taskResource)
+  {
+    Map<String, String> states =  _record.getMapField(TASK_STATES);
+    if(states == null)
+    {
+      return null;
+    }
+
+    String s = states.get(taskResource);
+    if (s == null)
+    {
+      return null;
+    }
+
+    return TaskState.valueOf(s);
+  }
+
+  public void setStartTime(long t)
+  {
+    _record.setSimpleField(START_TIME, String.valueOf(t));
+  }
+
+  public long getStartTime()
+  {
+    String tStr = _record.getSimpleField(START_TIME);
+    if (tStr == null)
+    {
+      return -1;
+    }
+
+    return Long.parseLong(tStr);
+  }
+
+  public void setFinishTime(long t)
+  {
+    _record.setSimpleField(FINISH_TIME, String.valueOf(t));
+  }
+
+  public long getFinishTime()
+  {
+    String tStr = _record.getSimpleField(FINISH_TIME);
+    if (tStr == null)
+    {
+      return UNFINISHED;
+    }
+
+    return Long.parseLong(tStr);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/beans/TaskBean.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/beans/TaskBean.java b/helix-core/src/main/java/org/apache/helix/task/beans/TaskBean.java
new file mode 100644
index 0000000..2fe2f6f
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/beans/TaskBean.java
@@ -0,0 +1,30 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task.beans;
+
+
+import java.util.List;
+import java.util.Map;
+import org.apache.helix.task.TaskConfig;
+
+
+/**
+ * Bean class used for parsing task definitions from YAML.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskBean
+{
+  public String name;
+  public List<String> parents;
+  public String targetResource;
+  public List<String> targetPartitionStates;
+  public List<Integer> targetPartitions;
+  public String command;
+  public Map<String, Object> commandConfig;
+  public long timeoutPerPartition = TaskConfig.DEFAULT_TIMEOUT_PER_PARTITION;
+  public int numConcurrentTasksPerInstance = TaskConfig.DEFAULT_NUM_CONCURRENT_TASKS_PER_INSTANCE;
+  public int maxAttemptsPerPartition = TaskConfig.DEFAULT_MAX_ATTEMPTS_PER_PARTITION;
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/beans/WorkflowBean.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/beans/WorkflowBean.java b/helix-core/src/main/java/org/apache/helix/task/beans/WorkflowBean.java
new file mode 100644
index 0000000..e8fcd88
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/beans/WorkflowBean.java
@@ -0,0 +1,21 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task.beans;
+
+
+import java.util.List;
+
+
+/**
+ * Bean class used for parsing workflow definitions from YAML.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class WorkflowBean
+{
+  public String name;
+  public String expiry;
+  public List<TaskBean> tasks;
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/tools/ClusterSetup.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/tools/ClusterSetup.java b/helix-core/src/main/java/org/apache/helix/tools/ClusterSetup.java
index a39e571..2131c3c 100644
--- a/helix-core/src/main/java/org/apache/helix/tools/ClusterSetup.java
+++ b/helix-core/src/main/java/org/apache/helix/tools/ClusterSetup.java
@@ -164,6 +164,8 @@ public class ClusterSetup {
         StateModelConfigGenerator.generateConfigForOnlineOffline()));
     addStateModelDef(clusterName, "ScheduledTask", new StateModelDefinition(
         StateModelConfigGenerator.generateConfigForScheduledTaskQueue()));
+    addStateModelDef(clusterName, "Task",
+        new StateModelDefinition(StateModelConfigGenerator.generateConfigForTaskStateModel()));
   }
 
   public void activateCluster(String clusterName, String grandCluster, boolean enable) {

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/tools/StateModelConfigGenerator.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/tools/StateModelConfigGenerator.java b/helix-core/src/main/java/org/apache/helix/tools/StateModelConfigGenerator.java
index 508e447..b8b3aeb 100644
--- a/helix-core/src/main/java/org/apache/helix/tools/StateModelConfigGenerator.java
+++ b/helix-core/src/main/java/org/apache/helix/tools/StateModelConfigGenerator.java
@@ -23,13 +23,15 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-
 import org.apache.helix.ZNRecord;
 import org.apache.helix.manager.zk.DefaultSchedulerMessageHandlerFactory;
 import org.apache.helix.manager.zk.ZNRecordSerializer;
-import org.apache.helix.model.Transition;
 import org.apache.helix.model.StateModelDefinition.StateModelDefinitionProperty;
+import org.apache.helix.model.Transition;
 import org.apache.helix.model.builder.StateTransitionTableBuilder;
+import org.apache.helix.task.TaskPartitionState;
+import org.apache.helix.task.TaskConstants;
+
 
 // TODO refactor to use StateModelDefinition.Builder
 public class StateModelConfigGenerator {
@@ -348,4 +350,94 @@ public class StateModelConfigGenerator {
         stateTransitionPriorityList);
     return record;
   }
+
+  public static ZNRecord generateConfigForTaskStateModel()
+  {
+    ZNRecord record = new ZNRecord(TaskConstants.STATE_MODEL_NAME);
+
+    record.setSimpleField(StateModelDefinitionProperty.INITIAL_STATE.toString(), TaskPartitionState.INIT.name());
+    List<String> statePriorityList = new ArrayList<String>();
+    statePriorityList.add(TaskPartitionState.INIT.name());
+    statePriorityList.add(TaskPartitionState.RUNNING.name());
+    statePriorityList.add(TaskPartitionState.STOPPED.name());
+    statePriorityList.add(TaskPartitionState.COMPLETED.name());
+    statePriorityList.add(TaskPartitionState.TIMED_OUT.name());
+    statePriorityList.add(TaskPartitionState.TASK_ERROR.name());
+    statePriorityList.add(TaskPartitionState.DROPPED.name());
+    record.setListField(StateModelDefinitionProperty.STATE_PRIORITY_LIST.toString(), statePriorityList);
+    for (String state : statePriorityList)
+    {
+      String key = state + ".meta";
+      Map<String, String> metadata = new HashMap<String, String>();
+      metadata.put("count", "-1");
+      record.setMapField(key, metadata);
+    }
+
+    List<String> states = new ArrayList<String>();
+    states.add(TaskPartitionState.INIT.name());
+    states.add(TaskPartitionState.RUNNING.name());
+    states.add(TaskPartitionState.STOPPED.name());
+    states.add(TaskPartitionState.COMPLETED.name());
+    states.add(TaskPartitionState.TIMED_OUT.name());
+    states.add(TaskPartitionState.TASK_ERROR.name());
+    states.add(TaskPartitionState.DROPPED.name());
+
+    List<Transition> transitions = new ArrayList<Transition>();
+    transitions.add(new Transition(TaskPartitionState.INIT.name(), TaskPartitionState.RUNNING.name()));
+    transitions.add(new Transition(TaskPartitionState.RUNNING.name(), TaskPartitionState.STOPPED.name()));
+    transitions.add(new Transition(TaskPartitionState.RUNNING.name(), TaskPartitionState.COMPLETED.name()));
+    transitions.add(new Transition(TaskPartitionState.RUNNING.name(), TaskPartitionState.TIMED_OUT.name()));
+    transitions.add(new Transition(TaskPartitionState.RUNNING.name(), TaskPartitionState.TASK_ERROR.name()));
+    transitions.add(new Transition(TaskPartitionState.STOPPED.name(), TaskPartitionState.RUNNING.name()));
+
+    // All states have a transition to DROPPED.
+    transitions.add(new Transition(TaskPartitionState.INIT.name(), TaskPartitionState.DROPPED.name()));
+    transitions.add(new Transition(TaskPartitionState.RUNNING.name(), TaskPartitionState.DROPPED.name()));
+    transitions.add(new Transition(TaskPartitionState.COMPLETED.name(), TaskPartitionState.DROPPED.name()));
+    transitions.add(new Transition(TaskPartitionState.STOPPED.name(), TaskPartitionState.DROPPED.name()));
+    transitions.add(new Transition(TaskPartitionState.TIMED_OUT.name(), TaskPartitionState.DROPPED.name()));
+    transitions.add(new Transition(TaskPartitionState.TASK_ERROR.name(), TaskPartitionState.DROPPED.name()));
+
+    // All states, except DROPPED, have a transition to INIT.
+    transitions.add(new Transition(TaskPartitionState.RUNNING.name(), TaskPartitionState.INIT.name()));
+    transitions.add(new Transition(TaskPartitionState.COMPLETED.name(), TaskPartitionState.INIT.name()));
+    transitions.add(new Transition(TaskPartitionState.STOPPED.name(), TaskPartitionState.INIT.name()));
+    transitions.add(new Transition(TaskPartitionState.TIMED_OUT.name(), TaskPartitionState.INIT.name()));
+    transitions.add(new Transition(TaskPartitionState.TASK_ERROR.name(), TaskPartitionState.INIT.name()));
+
+    StateTransitionTableBuilder builder = new StateTransitionTableBuilder();
+    Map<String, Map<String, String>> next = builder.buildTransitionTable(states, transitions);
+
+    for (String state : statePriorityList)
+    {
+      String key = state + ".next";
+      record.setMapField(key, next.get(state));
+    }
+
+    List<String> stateTransitionPriorityList = new ArrayList<String>();
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.INIT.name(), TaskPartitionState.RUNNING.name()));
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.RUNNING.name(), TaskPartitionState.STOPPED.name()));
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.RUNNING.name(), TaskPartitionState.COMPLETED.name()));
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.RUNNING.name(), TaskPartitionState.TIMED_OUT.name()));
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.RUNNING.name(), TaskPartitionState.TASK_ERROR.name()));
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.STOPPED.name(), TaskPartitionState.RUNNING.name()));
+
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.INIT.name(), TaskPartitionState.DROPPED.name()));
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.RUNNING.name(), TaskPartitionState.DROPPED.name()));
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.COMPLETED.name(), TaskPartitionState.DROPPED.name()));
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.STOPPED.name(), TaskPartitionState.DROPPED.name()));
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.TIMED_OUT.name(), TaskPartitionState.DROPPED.name()));
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.TASK_ERROR.name(), TaskPartitionState.DROPPED.name()));
+
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.RUNNING.name(), TaskPartitionState.INIT.name()));
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.COMPLETED.name(), TaskPartitionState.INIT.name()));
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.STOPPED.name(), TaskPartitionState.INIT.name()));
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.TIMED_OUT.name(), TaskPartitionState.INIT.name()));
+    stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.TASK_ERROR.name(), TaskPartitionState.INIT.name()));
+
+    record.setListField(StateModelDefinitionProperty.STATE_TRANSITION_PRIORITYLIST.toString(),
+                        stateTransitionPriorityList);
+
+    return record;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/test/java/org/apache/helix/DummyProcessThread.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/DummyProcessThread.java b/helix-core/src/test/java/org/apache/helix/DummyProcessThread.java
index f51aa1d..fbe20d5 100644
--- a/helix-core/src/test/java/org/apache/helix/DummyProcessThread.java
+++ b/helix-core/src/test/java/org/apache/helix/DummyProcessThread.java
@@ -27,9 +27,8 @@ import org.apache.log4j.Logger;
 
 public class DummyProcessThread implements Runnable {
   private static final Logger LOG = Logger.getLogger(DummyProcessThread.class);
-
-  HelixManager _manager;
-  String _instanceName;
+  private final HelixManager _manager;
+  private final String _instanceName;
 
   public DummyProcessThread(HelixManager manager, String instanceName) {
     _manager = manager;
@@ -40,8 +39,6 @@ public class DummyProcessThread implements Runnable {
   public void run() {
     try {
       DummyStateModelFactory stateModelFactory = new DummyStateModelFactory(0);
-      // StateMachineEngine genericStateMachineHandler =
-      // new StateMachineEngine();
       StateMachineEngine stateMach = _manager.getStateMachineEngine();
       stateMach.registerStateModelFactory("MasterSlave", stateModelFactory);
 
@@ -51,9 +48,6 @@ public class DummyProcessThread implements Runnable {
           new DummyOnlineOfflineStateModelFactory(10);
       stateMach.registerStateModelFactory("LeaderStandby", stateModelFactory1);
       stateMach.registerStateModelFactory("OnlineOffline", stateModelFactory2);
-      // _manager.getMessagingService()
-      // .registerMessageHandlerFactory(MessageType.STATE_TRANSITION.toString(),
-      // genericStateMachineHandler);
 
       _manager.connect();
       Thread.currentThread().join();
@@ -61,9 +55,7 @@ public class DummyProcessThread implements Runnable {
       String msg =
           "participant:" + _instanceName + ", " + Thread.currentThread().getName() + " interrupted";
       LOG.info(msg);
-      // System.err.println(msg);
     } catch (Exception e) {
-      // TODO Auto-generated catch block
       e.printStackTrace();
     }
   }

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/test/java/org/apache/helix/integration/ZkIntegrationTestBase.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/integration/ZkIntegrationTestBase.java b/helix-core/src/test/java/org/apache/helix/integration/ZkIntegrationTestBase.java
index 2ab0aaf..fbf0601 100644
--- a/helix-core/src/test/java/org/apache/helix/integration/ZkIntegrationTestBase.java
+++ b/helix-core/src/test/java/org/apache/helix/integration/ZkIntegrationTestBase.java
@@ -79,8 +79,7 @@ public class ZkIntegrationTestBase {
   }
 
   protected String getShortClassName() {
-    String className = this.getClass().getName();
-    return className.substring(className.lastIndexOf('.') + 1);
+    return this.getClass().getSimpleName();
   }
 
   protected String getCurrentLeader(ZkClient zkClient, String clusterName) {


[04/15] Adding Helix-task-framework and Yarn integration modules

Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderProcess.java
new file mode 100644
index 0000000..9ea713c
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderProcess.java
@@ -0,0 +1,82 @@
+package org.apache.helix.metamanager.provider;
+
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.ClusterAdmin;
+import org.apache.helix.metamanager.ContainerProvider;
+import org.apache.helix.metamanager.HelixClusterAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Helix participant for ContainerProvider. Configurable via ProviderProperties
+ * and runnable service.
+ * 
+ */
+public class ProviderProcess implements Service {
+    static final Logger log = Logger.getLogger(ProviderProcess.class);
+
+    ClusterAdmin        admin;
+
+    ProviderProperties  properties;
+    ContainerProvider   provider;
+    HelixAdmin          helixAdmin;
+    HelixManager        participantManager;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        Preconditions.checkNotNull(properties);
+        ProviderProperties providerProperties = new ProviderProperties();
+        providerProperties.putAll(properties);
+        Preconditions.checkArgument(providerProperties.isValid());
+
+        this.properties = providerProperties;
+
+    }
+
+    public void setConteinerProvider(ContainerProvider provider) {
+        this.provider = provider;
+    }
+
+    @Override
+    public void start() throws Exception {
+        Preconditions.checkNotNull(provider);
+
+        log.info(String.format("Registering provider '%s' at '%s/%s'", properties.getName(), properties.getMetaAddress(), properties.getMetaCluster()));
+        HelixAdmin metaHelixAdmin = new ZKHelixAdmin(properties.getMetaAddress());
+        metaHelixAdmin.addInstance(properties.getMetaCluster(), new InstanceConfig(properties.getName()));
+        metaHelixAdmin.close();
+
+        log.info(String.format("Starting provider '%s'", properties.getName()));
+        helixAdmin = new ZKHelixAdmin(properties.getAddress());
+        admin = new HelixClusterAdmin(properties.getCluster(), helixAdmin);
+
+        participantManager = HelixManagerFactory.getZKHelixManager(properties.getMetaCluster(), properties.getName(), InstanceType.PARTICIPANT,
+                properties.getMetaAddress());
+        participantManager.getStateMachineEngine().registerStateModelFactory("OnlineOffline", new ProviderStateModelFactory(provider, admin));
+        participantManager.connect();
+
+        log.info(String.format("Successfully started provider '%s'", properties.getName()));
+    }
+
+    @Override
+    public void stop() {
+        log.info(String.format("Stopping provider '%s'", properties.getName()));
+        if (participantManager != null) {
+            participantManager.disconnect();
+            participantManager = null;
+        }
+        if (helixAdmin != null) {
+            helixAdmin.close();
+            helixAdmin = null;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderProperties.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderProperties.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderProperties.java
new file mode 100644
index 0000000..098592a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderProperties.java
@@ -0,0 +1,97 @@
+package org.apache.helix.metamanager.provider;
+
+import java.util.Collections;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.helix.metamanager.bootstrapper.BootUtils;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base configuration for {@link ProviderProcess}. 
+ *
+ */
+public class ProviderProperties extends Properties {
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = -2209509977839674160L;
+	
+	public final static String ADDRESS = "address";
+	public final static String CLUSTER = "cluster";
+    public final static String METAADDRESS = "metaaddress";
+    public final static String METACLUSTER = "metacluster";
+	public final static String NAME = "name";
+	
+	public final static String CONTAINER_NAMESPACE = "containers";
+	
+	public boolean isValid() {
+		return(containsKey(ADDRESS) &&
+		       containsKey(CLUSTER) &&
+		       containsKey(METAADDRESS) &&
+               containsKey(METACLUSTER) &&
+               containsKey(NAME));
+	}
+	
+	public String getAddress() {
+		return getProperty(ADDRESS);
+	}
+	
+	public String getCluster() {
+	    return getProperty(CLUSTER);
+	}
+	
+    public String getMetaAddress() {
+        return getProperty(METAADDRESS);
+    }
+    
+    public String getMetaCluster() {
+        return getProperty(METACLUSTER);
+    }
+    
+	public String getName() {
+	    return getProperty(NAME);
+	}
+	
+	public Set<String> getContainers() {
+        if(!BootUtils.hasNamespace(this, CONTAINER_NAMESPACE))
+            return Collections.emptySet();
+	    return BootUtils.getNamespaces(BootUtils.getNamespace(this, CONTAINER_NAMESPACE));
+	}
+	
+	public boolean hasContainer(String id) {
+	    if(!BootUtils.hasNamespace(this, CONTAINER_NAMESPACE)) return false;
+	    if(!BootUtils.hasNamespace(BootUtils.getNamespace(this, CONTAINER_NAMESPACE), id)) return false;
+	    return true;
+	}
+	
+	public Properties getContainer(String id) {
+	    Preconditions.checkArgument(BootUtils.hasNamespace(this, CONTAINER_NAMESPACE), "no container namespace");
+        Preconditions.checkArgument(BootUtils.hasNamespace(BootUtils.getNamespace(this, CONTAINER_NAMESPACE), id), "container %s not configured", id);
+	    return BootUtils.getNamespace(BootUtils.getNamespace(this, CONTAINER_NAMESPACE), id);
+	}
+	
+	public void addContainer(String id, Properties properties) {
+	    Preconditions.checkArgument(!getContainers().contains(id), "Already contains container type %s", id);
+	    
+	    // add container config
+        for(Map.Entry<Object, Object> entry : properties.entrySet()) {
+            this.put(CONTAINER_NAMESPACE + "." + id + "." + entry.getKey(), entry.getValue());
+        }
+	}
+
+    @Override
+    public Object get(Object key) {
+        Preconditions.checkState(containsKey(key));
+        return super.get(key);
+    }
+    
+    @Override
+    public String getProperty(String key) {
+        Preconditions.checkState(containsKey(key));
+        return super.getProperty(key);
+    }
+    
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderRebalancer.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderRebalancer.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderRebalancer.java
new file mode 100644
index 0000000..4be1a05
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderRebalancer.java
@@ -0,0 +1,352 @@
+package org.apache.helix.metamanager.provider;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.rebalancer.Rebalancer;
+import org.apache.helix.controller.stages.ClusterDataCache;
+import org.apache.helix.controller.stages.CurrentStateOutput;
+import org.apache.helix.metamanager.StatusProvider;
+import org.apache.helix.metamanager.TargetProvider;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.LiveInstance;
+import org.apache.helix.model.Partition;
+import org.apache.helix.model.Resource;
+import org.apache.helix.model.ResourceAssignment;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Sets;
+
+/**
+ * Rebalancer for meta cluster. Polls {@link TargetProvider} and
+ * {@link StatusProvider} and reads and sets IdealState of meta cluster participants (
+ * {@link ProviderProcess}). The number of active container is set to the target
+ * count. Failed containers are shut down and restarted on any available
+ * provider. Also, container counts are balanced across multiple providers.<br/>
+ * <b>NOTE:</b> status and target provider are injected via
+ * {@link ProviderRebalancerSingleton}<br/>
+ * <br/>
+ * <b>IdealState mapping:</b><br/>
+ * resource = container type<br/>
+ * partition = logical container instance<br/>
+ * instance = container provider<br/>
+ * status = physical container instance presence<br/>
+ */
+public class ProviderRebalancer implements Rebalancer {
+
+    static final Logger log                 = Logger.getLogger(ProviderRebalancer.class);
+
+    static final long   UPDATE_INTERVAL_MIN = 1500;
+
+    static final Object lock                = new Object();
+    static long         nextUpdate          = 0;
+
+    TargetProvider      targetProvider;
+    StatusProvider      statusProvider;
+    HelixManager        manager;
+
+    @Override
+    public void init(HelixManager manager) {
+        this.targetProvider = ProviderRebalancerSingleton.getTargetProvider();
+        this.statusProvider = ProviderRebalancerSingleton.getStatusProvider();
+        this.manager = manager;
+    }
+
+    @Override
+    public ResourceAssignment computeResourceMapping(Resource resource, IdealState idealState, CurrentStateOutput currentStateOutput,
+            ClusterDataCache clusterData) {
+
+        final String resourceName = resource.getResourceName();
+        final String containerType = resourceName;
+
+        final SortedSet<String> allContainers = Sets.newTreeSet(new IndexedNameComparator());
+        allContainers.addAll(idealState.getPartitionSet());
+
+        final SortedSet<String> allProviders = Sets.newTreeSet(new IndexedNameComparator());
+        for (LiveInstance instance : clusterData.getLiveInstances().values()) {
+            allProviders.add(instance.getId());
+        }
+
+        final ResourceState currentState = new ResourceState(resourceName, currentStateOutput);
+
+        // target container count
+        log.debug(String.format("Retrieving target container count for type '%s'", containerType));
+        int targetCount = -1;
+        try {
+            targetCount = targetProvider.getTargetContainerCount(containerType);
+        } catch (Exception e) {
+            log.error(String.format("Could not retrieve target count for '%s'", containerType), e);
+            return new ResourceAssignment(resourceName);
+        }
+
+        // provider sanity check
+        if (allProviders.isEmpty()) {
+            log.warn(String.format("Could not find any providers"));
+            return new ResourceAssignment(resourceName);
+        }
+
+        // all containers
+        SortedSet<String> assignedContainers = getAssignedContainers(currentState, allContainers);
+        SortedSet<String> failedContainers = getFailedContainers(currentState, allContainers);
+
+        log.info(String.format("Rebalancing '%s' (target=%d, active=%d, failures=%d)", resourceName, targetCount, assignedContainers.size(),
+                failedContainers.size()));
+
+        if (log.isDebugEnabled()) {
+            log.debug(String.format("%s: assigned containers %s", resourceName, assignedContainers));
+            log.debug(String.format("%s: failed containers %s", resourceName, failedContainers));
+        }
+
+        // assignment
+        int maxCountPerProvider = (int) Math.ceil(targetCount / (float) allProviders.size());
+
+        ResourceAssignment assignment = new ResourceAssignment(resourceName);
+        CountMap counts = new CountMap(allProviders);
+        int assignmentCount = 0;
+
+        // currently assigned
+        for (String containerName : assignedContainers) {
+            String providerName = getProvider(currentState, containerName);
+            Partition partition = new Partition(containerName);
+
+            if (failedContainers.contains(containerName)) {
+                log.warn(String.format("Container '%s:%s' failed, going offline", providerName, containerName));
+                assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "OFFLINE"));
+
+            } else if (counts.get(providerName) >= maxCountPerProvider) {
+                log.warn(String.format("Container '%s:%s' misassigned, going offline", providerName, containerName));
+                assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "OFFLINE"));
+
+            } else {
+                assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "ONLINE"));
+            }
+
+            counts.increment(providerName);
+            assignmentCount++;
+        }
+
+        // currently unassigned
+        SortedSet<String> unassignedContainers = Sets.newTreeSet(new IndexedNameComparator());
+        unassignedContainers.addAll(allContainers);
+        unassignedContainers.removeAll(assignedContainers);
+
+        for (String containerName : unassignedContainers) {
+            if (assignmentCount >= targetCount)
+                break;
+
+            String providerName = counts.getMinKey();
+            Partition partition = new Partition(containerName);
+
+            if (failedContainers.contains(containerName)) {
+                log.warn(String.format("Container '%s:%s' failed and unassigned, going offline", providerName, containerName));
+                assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "OFFLINE"));
+
+            } else {
+                assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "ONLINE"));
+            }
+
+            counts.increment(providerName);
+            assignmentCount++;
+        }
+
+        if (log.isDebugEnabled()) {
+            log.debug(String.format("assignment counts: %s", counts));
+            log.debug(String.format("assignment: %s", assignment));
+        }
+
+        return assignment;
+    }
+
+    boolean hasProvider(ResourceState state, String containerName) {
+        Map<String, String> currentStateMap = state.getCurrentStateMap(containerName);
+        Map<String, String> pendingStateMap = state.getPendingStateMap(containerName);
+        return hasInstance(currentStateMap, "ONLINE") || hasInstance(pendingStateMap, "ONLINE");
+    }
+
+    String getProvider(ResourceState state, String containerName) {
+        Map<String, String> currentStateMap = state.getCurrentStateMap(containerName);
+        if (hasInstance(currentStateMap, "ONLINE"))
+            return getInstance(currentStateMap, "ONLINE");
+
+        Map<String, String> pendingStateMap = state.getPendingStateMap(containerName);
+        return getInstance(pendingStateMap, "ONLINE");
+    }
+
+    SortedSet<String> getFailedContainers(ResourceState state, Collection<String> containers) {
+        SortedSet<String> failedContainers = Sets.newTreeSet(new IndexedNameComparator());
+        for (String containerName : containers) {
+            Map<String, String> currentStateMap = state.getCurrentStateMap(containerName);
+            Map<String, String> pendingStateMap = state.getPendingStateMap(containerName);
+
+            if (hasInstance(currentStateMap, "ERROR")) {
+                failedContainers.add(containerName);
+                continue;
+            }
+
+            if (!hasInstance(currentStateMap, "ONLINE") || hasInstance(pendingStateMap, "OFFLINE"))
+                continue;
+
+            // container listed online and not in transition, but not active
+            if (!statusProvider.isHealthy(containerName)) {
+                log.warn(String.format("Container '%s' designated ONLINE, but is not active", containerName));
+                failedContainers.add(containerName);
+            }
+        }
+        return failedContainers;
+    }
+
+    SortedSet<String> getAssignedContainers(ResourceState state, Collection<String> containers) {
+        SortedSet<String> assignedContainers = Sets.newTreeSet(new IndexedNameComparator());
+        for (String containerName : containers) {
+
+            if (!hasProvider(state, containerName))
+                continue;
+
+            assignedContainers.add(containerName);
+        }
+        return assignedContainers;
+    }
+
+    boolean hasInstance(Map<String, String> stateMap, String state) {
+        if (!stateMap.isEmpty()) {
+            for (Map.Entry<String, String> entry : stateMap.entrySet()) {
+                if (entry.getValue().equals(state)) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
+    String getInstance(Map<String, String> stateMap, String state) {
+        if (!stateMap.isEmpty()) {
+            for (Map.Entry<String, String> entry : stateMap.entrySet()) {
+                if (entry.getValue().equals(state)) {
+                    return entry.getKey();
+                }
+            }
+        }
+        throw new IllegalArgumentException(String.format("Could not find instance with state '%s'", state));
+    }
+
+    class IndexedNameComparator implements Comparator<String> {
+        Pattern pattern = Pattern.compile("^(.*)([0-9]+)$");
+
+        @Override
+        public int compare(String o1, String o2) {
+            Matcher m1 = pattern.matcher(o1);
+            Matcher m2 = pattern.matcher(o2);
+
+            boolean find1 = m1.find();
+            boolean find2 = m2.find();
+
+            if (!find1 && !find2)
+                return o1.compareTo(o2);
+
+            if (!find1 && find2)
+                return -1;
+
+            if (find1 && !find2)
+                return 1;
+
+            String name1 = m1.group(1);
+            String name2 = m2.group(1);
+
+            int name_comp = name1.compareTo(name2);
+            if (name_comp != 0)
+                return name_comp;
+
+            int index1 = Integer.valueOf(m1.group(2));
+            int index2 = Integer.valueOf(m2.group(2));
+
+            return (int) Math.signum(index1 - index2);
+        }
+    }
+
+    class CountMap extends HashMap<String, Integer> {
+        /**
+         * 
+         */
+        private static final long serialVersionUID = 3954138748385337978L;
+
+        public CountMap(Collection<String> keys) {
+            super();
+            for (String key : keys) {
+                put(key, 0);
+            }
+        }
+
+        @Override
+        public Integer get(Object key) {
+            Preconditions.checkArgument(containsKey(key), "Key %s not found", key);
+            return super.get(key);
+        }
+
+        public int increment(String key) {
+            int newValue = get(key) + 1;
+            Preconditions.checkArgument(containsKey(key), "Key %s not found", key);
+            put(key, newValue);
+            return newValue;
+        }
+
+        public String getMinKey() {
+            Preconditions.checkState(size() > 0, "Must contain at least one item");
+
+            String minKey = null;
+            int minValue = Integer.MAX_VALUE;
+
+            for (String key : keySet()) {
+                int value = get(key);
+                if (value < minValue) {
+                    minValue = value;
+                    minKey = key;
+                }
+            }
+
+            return minKey;
+        }
+
+        public String getMaxKey() {
+            Preconditions.checkState(size() > 0, "Must contain at least one item");
+
+            String maxKey = null;
+            int maxValue = Integer.MIN_VALUE;
+
+            for (String key : keySet()) {
+                int value = get(key);
+                if (value > maxValue) {
+                    maxValue = value;
+                    maxKey = key;
+                }
+            }
+
+            return maxKey;
+        }
+    }
+
+    class ResourceState {
+        final String             resourceName;
+        final CurrentStateOutput state;
+
+        public ResourceState(String resourceName, CurrentStateOutput state) {
+            this.resourceName = resourceName;
+            this.state = state;
+        }
+
+        Map<String, String> getCurrentStateMap(String partitionName) {
+            return state.getCurrentStateMap(resourceName, new Partition(partitionName));
+        }
+
+        Map<String, String> getPendingStateMap(String partitionName) {
+            return state.getPendingStateMap(resourceName, new Partition(partitionName));
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderRebalancerSingleton.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderRebalancerSingleton.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderRebalancerSingleton.java
new file mode 100644
index 0000000..c46f5f5
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderRebalancerSingleton.java
@@ -0,0 +1,38 @@
+package org.apache.helix.metamanager.provider;
+
+import org.apache.helix.metamanager.StatusProvider;
+import org.apache.helix.metamanager.TargetProvider;
+import org.apache.log4j.Logger;
+
+/**
+ * Utility for dependency injection into ProviderRebalancer.
+ * 
+ */
+public class ProviderRebalancerSingleton {
+
+    static final Logger   log = Logger.getLogger(ProviderRebalancerSingleton.class);
+
+    static TargetProvider targetProvider;
+    static StatusProvider statusProvider;
+
+    private ProviderRebalancerSingleton() {
+        // left blank
+    }
+
+    public static TargetProvider getTargetProvider() {
+        return targetProvider;
+    }
+
+    public static void setTargetProvider(TargetProvider targetProvider) {
+        ProviderRebalancerSingleton.targetProvider = targetProvider;
+    }
+
+    public static StatusProvider getStatusProvider() {
+        return statusProvider;
+    }
+
+    public static void setStatusProvider(StatusProvider statusProvider) {
+        ProviderRebalancerSingleton.statusProvider = statusProvider;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderStateModel.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderStateModel.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderStateModel.java
new file mode 100644
index 0000000..090f807
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderStateModel.java
@@ -0,0 +1,114 @@
+package org.apache.helix.metamanager.provider;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.ClusterAdmin;
+import org.apache.helix.metamanager.ContainerProvider;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Helix state model implementation for {@link ContainerProvider}s. Updates
+ * configuration of managed Helix cluster and spawns and destroys container
+ * instances.
+ * 
+ */
+@StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE" })
+public class ProviderStateModel extends StateModel {
+
+    static final Logger log = Logger.getLogger(ProviderStateModel.class);
+
+    ContainerProvider   provider;
+    ClusterAdmin        admin;
+
+    public ProviderStateModel(ContainerProvider provider, ClusterAdmin admin) {
+        this.provider = provider;
+        this.admin = admin;
+    }
+
+    @Transition(from = "OFFLINE", to = "ONLINE")
+    public void acquire(Message m, NotificationContext context) throws Exception {
+        String containerType = m.getResourceName();
+        String containerId = m.getPartitionName();
+        String instanceId = context.getManager().getInstanceName();
+
+        log.trace(String.format("%s:%s transitioning from OFFLINE to ONLINE", containerId, instanceId));
+
+        bestEffortRemove(containerId);
+
+        // add instance to cluster
+        admin.addInstance(containerId, containerType);
+
+        // create container
+        provider.create(containerId, containerType);
+
+        try {
+            admin.rebalance();
+        } catch (Exception e) {
+            // ignore
+            log.warn(String.format("rebalancing cluster failed (error='%s')", e.getMessage()));
+        }
+
+        log.info(String.format("%s acquired container '%s' (type='%s')", instanceId, containerId, containerType));
+    }
+
+    @Transition(from = "ONLINE", to = "OFFLINE")
+    public void release(Message m, NotificationContext context) {
+        String containerId = m.getPartitionName();
+        String instanceId = context.getManager().getInstanceName();
+
+        log.trace(String.format("%s:%s transitioning from ONLINE to OFFLINE", containerId, instanceId));
+
+        bestEffortRemove(containerId);
+
+        try {
+            admin.rebalance();
+        } catch (Exception e) {
+            // ignore
+            log.warn(String.format("rebalancing cluster failed (error='%s')", e.getMessage()));
+        }
+
+        log.info(String.format("%s destroyed container '%s'", instanceId, containerId));
+
+    }
+
+    @Transition(from = "ERROR", to = "OFFLINE")
+    public void recover(Message m, NotificationContext context) {
+        String containerId = m.getPartitionName();
+        String instanceId = context.getManager().getInstanceName();
+
+        log.trace(String.format("%s:%s transitioning from ERROR to OFFLINE", containerId, instanceId));
+
+        release(m, context);
+    }
+
+    @Transition(from = "OFFLINE", to = "DROPPED")
+    public void drop(Message m, NotificationContext context) {
+        String containerId = m.getPartitionName();
+        String instanceId = context.getManager().getInstanceName();
+
+        log.trace(String.format("%s:%s transitioning from OFFLINE to DROPPED", containerId, instanceId));
+    }
+
+    private void bestEffortRemove(String containerId) {
+        log.debug(String.format("Best effort removal of container '%s'", containerId));
+
+        try {
+            provider.destroy(containerId);
+            log.debug(String.format("Container '%s' destroyed", containerId));
+        } catch (Exception e) {
+            log.debug(String.format("Container '%s' does not exist", containerId));
+        }
+
+        try {
+            admin.removeInstance(containerId);
+            log.debug(String.format("Instance '%s' removed", containerId));
+        } catch (Exception e) {
+            log.debug(String.format("Instance '%s' does not exist", containerId));
+        }
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderStateModelFactory.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderStateModelFactory.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderStateModelFactory.java
new file mode 100644
index 0000000..36a071a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderStateModelFactory.java
@@ -0,0 +1,27 @@
+package org.apache.helix.metamanager.provider;
+
+import org.apache.helix.metamanager.ClusterAdmin;
+import org.apache.helix.metamanager.ContainerProvider;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+
+/**
+ * Factory for {@link ProviderStateModel}. Injects {@link ClusterAdmin} for
+ * managed cluster and {@link ContainerProvider}.
+ * 
+ */
+class ProviderStateModelFactory extends StateModelFactory<ProviderStateModel> {
+
+    final ContainerProvider provider;
+    final ClusterAdmin      admin;
+
+    public ProviderStateModelFactory(ContainerProvider provider, ClusterAdmin admin) {
+        super();
+        this.provider = provider;
+        this.admin = admin;
+    }
+
+    @Override
+    public ProviderStateModel createNewStateModel(String partitionName) {
+        return new ProviderStateModel(provider, admin);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerProvider.java
new file mode 100644
index 0000000..b63d760
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerProvider.java
@@ -0,0 +1,75 @@
+package org.apache.helix.metamanager.provider.local;
+
+import java.util.HashSet;
+import java.util.Map;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+import org.apache.helix.metamanager.managed.ContainerProcess;
+import org.apache.helix.metamanager.provider.local.LocalContainerSingleton.LocalProcess;
+import org.apache.log4j.Logger;
+
+public class LocalContainerProvider implements ClusterContainerProvider {
+
+	static final Logger log = Logger.getLogger(LocalContainerProvider.class);
+	
+	static final String REQUIRED_TYPE = "container";
+	
+	final String zkAddress;
+	final String clusterName;
+	final String providerName;
+	
+	public LocalContainerProvider(String zkAddress, String clusterName, String providerName) {
+		this.zkAddress = zkAddress;
+		this.clusterName = clusterName;
+		this.providerName = providerName;
+	}
+
+	@Override
+	public void create(String id, String type) throws Exception {
+		Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+		
+		synchronized (processes) {	
+			if(processes.containsKey(id))
+				throw new IllegalArgumentException(String.format("Process '%s' already exists", id));
+			
+			if(!type.equals(REQUIRED_TYPE))
+				throw new IllegalArgumentException(String.format("Type '%s' not supported", type));
+			
+			log.info(String.format("Running container '%s' (zkAddress='%s', clusterName='%s')", id, zkAddress, clusterName));
+			
+			ContainerProcess process = new ContainerProcess(clusterName, zkAddress, id);
+			process.start();
+		
+			processes.put(id, new LocalProcess(id, providerName, process));
+			
+		}
+	}
+	
+	@Override
+	public void destroy(String id) throws Exception {
+		Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+		
+		synchronized (processes) {	
+			if(!processes.containsKey(id))
+				throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+			
+			log.info(String.format("Destroying container '%s'", id));
+			
+			LocalProcess local = processes.remove(id);
+			
+			local.process.stop();
+		}
+	}
+	
+	@Override
+	public void destroyAll() {
+		Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+		
+		synchronized (processes) {	
+			log.info("Destroying all processes");
+			for(String id : new HashSet<String>(processes.keySet())) {
+				try { destroy(id); } catch (Exception ignore) {}
+			}
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerSingleton.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerSingleton.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerSingleton.java
new file mode 100644
index 0000000..d25d3ba
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerSingleton.java
@@ -0,0 +1,40 @@
+package org.apache.helix.metamanager.provider.local;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.helix.metamanager.managed.ContainerProcess;
+
+public class LocalContainerSingleton {
+	final static Map<String, LocalProcess> processes = new HashMap<String, LocalProcess>();
+
+	private LocalContainerSingleton() {
+		// left blank
+	}
+	
+	public static Map<String, LocalProcess> getProcesses() {
+		return processes;
+	}
+	
+	public static void reset() {
+		synchronized (processes) {
+			for(LocalProcess local : processes.values()) {
+				local.process.stop();
+			}
+			processes.clear();
+		}
+	}
+	
+	static class LocalProcess {
+		final String id;
+		final String owner;
+		final ContainerProcess process;
+		
+		public LocalProcess(String id, String owner, ContainerProcess process) {
+			this.id = id;
+			this.owner = owner;
+			this.process = process;
+		}
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerStatusProvider.java
new file mode 100644
index 0000000..383a0d7
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerStatusProvider.java
@@ -0,0 +1,37 @@
+package org.apache.helix.metamanager.provider.local;
+
+import java.util.Map;
+
+import org.apache.helix.metamanager.ClusterContainerStatusProvider;
+import org.apache.helix.metamanager.provider.local.LocalContainerSingleton.LocalProcess;
+
+public class LocalContainerStatusProvider implements ClusterContainerStatusProvider {
+
+	@Override
+	public boolean exists(String id) {
+		Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+		
+		synchronized (processes) {
+			return processes.containsKey(id);
+		}
+	}
+
+	@Override
+	public boolean isActive(String id) {
+		Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+		
+		synchronized (processes) {
+			return processes.get(id).process != null;
+		}
+	}
+
+	@Override
+	public boolean isFailed(String id) {
+		Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+		
+		synchronized (processes) {
+			return processes.get(id).process == null;
+		}
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerProvider.java
new file mode 100644
index 0000000..eef730a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerProvider.java
@@ -0,0 +1,81 @@
+package org.apache.helix.metamanager.provider.shell;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+import org.apache.helix.metamanager.provider.shell.ShellContainerSingleton.ShellProcess;
+import org.apache.log4j.Logger;
+
+public class ShellContainerProvider implements ClusterContainerProvider {
+
+	static final Logger log = Logger.getLogger(ShellContainerProvider.class);
+	
+	static final String REQUIRED_TYPE = "container";
+	static final String RUN_COMMAND = "/bin/sh";
+	
+	// global view of processes required
+	static final Object staticLock = new Object();
+	static final Map<String, ShellProcess> processes = new HashMap<String, ShellProcess>();
+
+	final String zkAddress;
+	final String clusterName;
+	final String command;
+	final String providerName;
+	
+	public ShellContainerProvider(String zkAddress, String clusterName, String providerName, String command) {
+		this.zkAddress = zkAddress;
+		this.clusterName = clusterName;
+		this.command = command;
+		this.providerName = providerName;
+	}
+
+	@Override
+	public void create(String id, String type) throws Exception {
+		Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+		
+		synchronized (processes) {
+			if(processes.containsKey(id))
+				throw new IllegalArgumentException(String.format("Process '%s' already exists", id));
+			
+			if(!type.equals(REQUIRED_TYPE))
+				throw new IllegalArgumentException(String.format("Type '%s' not supported", type));
+			
+			log.info(String.format("Running container '%s' (zkAddress='%s', clusterName='%s', command='%s')", id, zkAddress, clusterName, command));
+			
+			ProcessBuilder builder = new ProcessBuilder(RUN_COMMAND, command, zkAddress, clusterName, id);
+			Process process = builder.start();
+			
+			processes.put(id, new ShellProcess(id, providerName, process));
+		}
+	}
+	
+	@Override
+	public void destroy(String id) throws Exception {
+		Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+		
+		synchronized (processes) {
+			if(!processes.containsKey(id))
+				throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+			
+			log.info(String.format("Destroying container '%s'", id));
+			
+			ShellProcess shell = processes.remove(id);
+			shell.process.destroy();
+			shell.process.waitFor();
+		}
+	}
+	
+	@Override
+	public void destroyAll() {
+		Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+		
+		synchronized (processes) {
+			log.info("Destroying all processes");
+			for(ShellProcess process : new HashSet<ShellProcess>(processes.values())) {
+				try { destroy(process.id); } catch (Exception ignore) {}
+			}
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerSingleton.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerSingleton.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerSingleton.java
new file mode 100644
index 0000000..ae7f3c1
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerSingleton.java
@@ -0,0 +1,38 @@
+package org.apache.helix.metamanager.provider.shell;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class ShellContainerSingleton {
+	static final Map<String, ShellProcess> processes = new HashMap<String, ShellProcess>();
+
+	private ShellContainerSingleton() {
+		// left blank
+	}
+	
+	public static Map<String, ShellProcess> getProcesses() {
+		return processes;
+	}
+	
+	public static void reset() {
+		synchronized (processes) {
+			for(ShellProcess local : processes.values()) {
+				local.process.destroy();
+				try { local.process.waitFor(); } catch(Exception ignore) {}
+			}
+			processes.clear();
+		}
+	}
+	
+	static class ShellProcess {
+		final String id;
+		final String owner;
+		final Process process;
+
+		public ShellProcess(String id, String owner, Process process) {
+			this.id = id;
+			this.owner = owner;
+			this.process = process;
+		}		
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerStatusProvider.java
new file mode 100644
index 0000000..0030c2d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerStatusProvider.java
@@ -0,0 +1,52 @@
+package org.apache.helix.metamanager.provider.shell;
+
+import java.util.Map;
+
+import org.apache.helix.metamanager.ClusterContainerStatusProvider;
+import org.apache.helix.metamanager.provider.shell.ShellContainerSingleton.ShellProcess;
+
+public class ShellContainerStatusProvider implements ClusterContainerStatusProvider {
+
+	@Override
+	public boolean exists(String id) {
+		Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+		synchronized (processes) {
+			return processes.containsKey(id);
+		}
+	}
+
+	@Override
+	public boolean isActive(String id) {
+		Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+		synchronized (processes) {
+			ShellProcess shell = processes.get(id);
+			
+			try {
+				shell.process.exitValue();
+				return false;
+			} catch (IllegalThreadStateException e) {
+				// still running
+				return true;
+			}
+		}
+	}
+
+	@Override
+	public boolean isFailed(String id) {
+		Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+		synchronized (processes) {
+			ShellProcess shell = processes.get(id);
+			
+			try {
+				return (shell.process.exitValue() != 0);
+			} catch (IllegalThreadStateException e) {
+				// still running
+				return false;
+			}
+		}
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ApplicationConfig.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ApplicationConfig.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ApplicationConfig.java
new file mode 100644
index 0000000..4c8f303
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ApplicationConfig.java
@@ -0,0 +1,32 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+public class ApplicationConfig {
+	final String clusterAddress;
+	final String clusterName;
+	final String metadataAddress;
+	final String providerName;
+
+	public ApplicationConfig(String clusterAddress, String clusterName,
+			String metadataAddress, String providerName) {
+		this.clusterAddress = clusterAddress;
+		this.clusterName = clusterName;
+		this.metadataAddress = metadataAddress;
+		this.providerName = providerName;
+	}
+
+	public String getClusterAddress() {
+		return clusterAddress;
+	}
+
+	public String getClusterName() {
+		return clusterName;
+	}
+
+	public String getMetadataAddress() {
+		return metadataAddress;
+	}
+
+	public String getProviderName() {
+		return providerName;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ContainerMetadata.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ContainerMetadata.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ContainerMetadata.java
new file mode 100644
index 0000000..73d1a1b
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ContainerMetadata.java
@@ -0,0 +1,50 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+
+class ContainerMetadata {
+
+	static enum ContainerState {
+		ACQUIRE,
+		CONNECTING,
+		ACTIVE,
+		TEARDOWN,
+		FAILED,
+		HALTED,
+		FINALIZE
+	}
+	
+	String id;
+	ContainerState state;
+	int yarnId;
+	String command;
+	String owner;
+
+	public ContainerMetadata() {
+		// left blank
+	}
+	
+	public ContainerMetadata(String id, String command, String owner) {
+		this.id = id;
+		this.state = ContainerState.ACQUIRE;
+		this.yarnId = -1;
+		this.command = command;
+		this.owner = owner;
+	}
+	
+	public ContainerMetadata(ContainerMetadata node, ContainerState state) {
+		this.id = node.id;
+		this.state = state;
+		this.yarnId = node.yarnId;
+		this.command = node.command;
+		this.owner = node.owner;
+	}
+	
+	public ContainerMetadata(ContainerMetadata node, ContainerState state, int yarnId) {
+		this.id = node.id;
+		this.state = state;
+		this.yarnId = yarnId;
+		this.command = node.command;
+		this.owner = node.owner;
+	}
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/MetadataService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/MetadataService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/MetadataService.java
new file mode 100644
index 0000000..dc6c060
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/MetadataService.java
@@ -0,0 +1,42 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import java.util.Collection;
+
+public interface MetadataService {
+
+	public boolean exists(String id);
+
+	public void create(ContainerMetadata meta) throws MetadataServiceException;
+
+	public ContainerMetadata read(String id) throws MetadataServiceException;
+
+	public Collection<ContainerMetadata> readAll() throws MetadataServiceException;
+
+	public void update(ContainerMetadata meta) throws MetadataServiceException;
+
+	public void delete(String id) throws MetadataServiceException;
+
+	public static class MetadataServiceException extends Exception {
+
+		/**
+		 * 
+		 */
+		private static final long serialVersionUID = -2846997013918977056L;
+
+		public MetadataServiceException() {
+			super();
+		}
+
+		public MetadataServiceException(String message, Throwable cause) {
+			super(message, cause);
+		}
+
+		public MetadataServiceException(String message) {
+			super(message);
+		}
+
+		public MetadataServiceException(Throwable cause) {
+			super(cause);
+		}	
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/Utils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/Utils.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/Utils.java
new file mode 100644
index 0000000..82871f1
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/Utils.java
@@ -0,0 +1,94 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.provider.yarn.ContainerMetadata.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.TypeAdapter;
+import com.google.gson.stream.JsonReader;
+import com.google.gson.stream.JsonToken;
+import com.google.gson.stream.JsonWriter;
+
+public class Utils {
+	
+	static final Logger log = Logger.getLogger(Utils.class);
+	
+	static Gson gson;
+	static {
+		GsonBuilder builder = new GsonBuilder();
+		builder.registerTypeAdapter(ContainerState.class, new ContainerStateAdapter());
+		builder.setPrettyPrinting();
+		gson = builder.create();
+	}
+	
+	static Map<String, LocalResource>  dummyResources = createDummyResources();
+	
+	static String toJson(ContainerMetadata meta) {
+		return gson.toJson(meta);
+	}
+	
+	static ContainerMetadata fromJson(String json) {
+		return gson.fromJson(json, ContainerMetadata.class);
+	}
+	
+	static Map<String, LocalResource> getDummyResources() {
+		return dummyResources;
+	}
+
+	private static Map<String, LocalResource> createDummyResources() {
+		File dummy = new File("/tmp/dummy");
+		
+		if(!dummy.exists()) {
+	    	try {
+	    		dummy.createNewFile();
+	    	} catch(Exception e) {
+	    		log.error("could not create dummy file", e);
+	    		System.exit(1);
+	    	}
+		}
+	    
+	    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
+	    Path path = new Path(dummy.toURI());
+	    LocalResource localResource = Records.newRecord(LocalResource.class);
+	    localResource.setType(LocalResourceType.FILE);
+	    localResource.setVisibility(LocalResourceVisibility.APPLICATION);          
+	    localResource.setResource(ConverterUtils.getYarnUrlFromPath(path)); 
+	    localResource.setTimestamp(dummy.lastModified());
+	    localResource.setSize(dummy.length());
+	    localResources.put("dummy", localResource);
+		return localResources;
+	}
+	
+	static class ContainerStateAdapter extends TypeAdapter<ContainerState> {
+		@Override
+		public ContainerState read(JsonReader reader) throws IOException {
+			if (reader.peek() == JsonToken.NULL) {
+				reader.nextNull();
+				return null;
+			}
+			return ContainerState.valueOf(reader.nextString());
+		}
+
+		@Override
+		public void write(JsonWriter writer, ContainerState value) throws IOException {
+			if (value == null) {
+				writer.nullValue();
+				return;
+			}
+			writer.value(value.name());
+		}
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnApplication.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnApplication.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnApplication.java
new file mode 100644
index 0000000..c4f3668
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnApplication.java
@@ -0,0 +1,125 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import java.net.InetSocketAddress;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.ClientRMProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+public class YarnApplication {
+
+	static final Logger log = Logger.getLogger(YarnApplication.class);
+	
+	static final String ENV_CLUSTER_ADDRESS = "YA_CLUSTER_ADDRESS";
+	static final String ENV_CLUSTER_NAME = "YA_CLUSTER_NAME";
+	static final String ENV_METADATA_ADDRESS = "YA_METADATA_ADDRESS";
+	static final String ENV_PROVIDER_NAME = "YA_PROVIDER_NAME";
+
+	static final String MASTER_COMMAND = "/bin/sh /home/apucher/incubator-helix/recipes/meta-cluster-manager/target/meta-cluster-manager-pkg/bin/yarn-master-process.sh 1>%s/stdout 2>%s/stderr";
+
+	Configuration conf;
+	YarnRPC rpc;
+	ClientRMProtocol rmClient;
+	ApplicationId appId;
+	
+	final ApplicationConfig appConfig;
+
+	public YarnApplication(ApplicationConfig appConfig) {
+		this.appConfig = appConfig;
+		configure(new YarnConfiguration());
+	}
+
+	public void start() throws Exception {
+		connect();
+		
+		String command = String.format(MASTER_COMMAND, "/tmp/" + appConfig.providerName, "/tmp/" + appConfig.providerName); 
+				//ApplicationConstants.LOG_DIR_EXPANSION_VAR, ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+
+		log.info(String.format("Starting application '%s' provider '%s' (masterCommand='%s')", appConfig.metadataAddress, appConfig.providerName, command));
+
+		// app id
+		GetNewApplicationRequest appRequest = Records.newRecord(GetNewApplicationRequest.class);
+		GetNewApplicationResponse appResponse = rmClient.getNewApplication(appRequest);
+
+		this.appId = appResponse.getApplicationId();
+
+		log.info(String.format("Acquired app id '%s' for '%s'", appId.toString(), appConfig.providerName));
+		
+		// command
+		ContainerLaunchContext launchContext = Records.newRecord(ContainerLaunchContext.class);
+		launchContext.setCommands(Collections.singletonList(command));
+
+		// resource limit
+		Resource resource = Records.newRecord(Resource.class);
+		resource.setMemory(256); // TODO make dynamic
+		launchContext.setResource(resource);
+		
+	    // environment
+	    Map<String, String> env = new HashMap<String, String>();
+	    env.put(ENV_CLUSTER_ADDRESS, appConfig.clusterAddress);
+	    env.put(ENV_CLUSTER_NAME, appConfig.clusterName);
+	    env.put(ENV_METADATA_ADDRESS, appConfig.metadataAddress);
+	    env.put(ENV_PROVIDER_NAME, appConfig.providerName);
+	    launchContext.setEnvironment(env);
+	    
+	    // local resources
+	    // YARN workaround: create dummy resource 
+	    Map<String, LocalResource> localResources = Utils.getDummyResources();
+	    launchContext.setLocalResources(localResources);
+	    
+	    // app submission
+	    ApplicationSubmissionContext subContext = Records.newRecord(ApplicationSubmissionContext.class);
+		subContext.setApplicationId(appId);
+		subContext.setApplicationName(appConfig.providerName);
+		subContext.setAMContainerSpec(launchContext);
+
+		SubmitApplicationRequest subRequest = Records.newRecord(SubmitApplicationRequest.class);
+		subRequest.setApplicationSubmissionContext(subContext);
+		
+		log.info(String.format("Starting app id '%s'", appId.toString()));
+
+		rmClient.submitApplication(subRequest);
+		
+	}
+
+	public void stop() throws YarnRemoteException {
+		log.info(String.format("Stopping app id '%s'", appId.toString()));
+		KillApplicationRequest killRequest = Records.newRecord(KillApplicationRequest.class);
+		killRequest.setApplicationId(appId);
+
+		rmClient.forceKillApplication(killRequest);
+	}
+
+	void configure(Configuration conf) {
+		this.conf = Preconditions.checkNotNull(conf);
+		this.rpc = YarnRPC.create(conf);
+	}
+
+	void connect() {
+		YarnConfiguration yarnConf = new YarnConfiguration(conf);
+		InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(
+				YarnConfiguration.RM_ADDRESS,
+				YarnConfiguration.DEFAULT_RM_ADDRESS));
+		log.info("Connecting to ResourceManager at: " + rmAddress);
+		this.rmClient = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, conf));
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerProcess.java
new file mode 100644
index 0000000..0d997bf
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerProcess.java
@@ -0,0 +1,60 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.log4j.Logger;
+
+public class YarnContainerProcess {
+	static final Logger log = Logger.getLogger(YarnContainerProcess.class);
+
+	public static void main(String[] args) throws Exception {
+		log.trace("BEGIN YarnProcess.main()");
+		  
+		final String clusterAddress = args[0];
+		final String clusterName = args[1];
+		final String metadataAddress = args[2];
+		final String providerName = args[3];
+		final String containerId = args[4];
+		
+		final ApplicationConfig appConfig = new ApplicationConfig(clusterAddress, clusterName, metadataAddress, providerName);
+		
+		log.debug("Launching metadata service");
+		final ZookeeperMetadataService metaService = new ZookeeperMetadataService(metadataAddress);
+		metaService.startService();
+		
+		log.debug("Launching yarn container service");
+		final YarnContainerService yarnProcess = new YarnContainerService(appConfig, metaService, containerId);
+		yarnProcess.startService();
+		
+		log.debug("Installing shutdown hooks");
+		Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+			@Override
+			public void run() {
+				yarnProcess.stopService();
+				metaService.stopService();
+			}
+		}));
+		
+		System.out.println("Press ENTER to stop container process");
+		System.in.read();
+		
+		log.trace("END YarnProcess.main()");
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerProvider.java
new file mode 100644
index 0000000..9f09d46
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerProvider.java
@@ -0,0 +1,108 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import java.util.concurrent.TimeoutException;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+import org.apache.helix.metamanager.provider.yarn.ContainerMetadata.ContainerState;
+import org.apache.helix.metamanager.provider.yarn.MetadataService.MetadataServiceException;
+import org.apache.log4j.Logger;
+
+public class YarnContainerProvider implements ClusterContainerProvider {
+	
+	static final Logger log = Logger.getLogger(YarnContainerProvider.class);
+
+	static final long POLL_INTERVAL = 1000;
+	
+	static final String REQUIRED_TYPE = "container";
+	
+	static final long CONTAINER_TIMEOUT = 10000;
+	
+	/*
+	 * CONTAINERS
+	 *   A (A, READY)
+	 *   B (B, RUNNING)
+	 */
+	
+	final ApplicationConfig appConfig;
+	final String command;
+	
+	final Object notifier = new Object();
+	
+	ZookeeperMetadataService metaService;
+	
+	public YarnContainerProvider(ApplicationConfig appConfig, String command) {
+		this.appConfig = appConfig;
+		this.command = command;
+	}
+
+	@Override
+	public void create(final String id, final String type) throws Exception {
+		if(!REQUIRED_TYPE.equals(type)) {
+			throw new IllegalArgumentException(String.format("Type '%s' not supported", type));
+		}
+		
+		metaService.create(new ContainerMetadata(id, command, appConfig.providerName));
+		waitForState(id, ContainerState.ACTIVE);
+	}
+
+	@Override
+	public void destroy(final String id) throws Exception {
+		ContainerMetadata meta = metaService.read(id);
+
+		if(meta.state == ContainerState.ACTIVE) {
+			log.info(String.format("Destroying active container, going to teardown"));
+			metaService.update(new ContainerMetadata(meta, ContainerState.TEARDOWN));
+			
+		} else if(meta.state == ContainerState.FAILED) {
+			log.info(String.format("Destroying failed container, going to halted"));
+			metaService.update(new ContainerMetadata(meta, ContainerState.HALTED));
+			
+		} else if(meta.state == ContainerState.FINALIZE) {
+			log.info(String.format("Destroying finalized container, skipping"));
+			
+		} else {
+			throw new IllegalStateException(String.format("Container '%s' must be active, failed or finalized", id));
+		}
+		
+		waitForState(id, ContainerState.FINALIZE);
+		metaService.delete(id);
+	}
+
+	@Override
+	public void destroyAll() {
+		try {
+			for(ContainerMetadata meta : metaService.readAll()) {
+				try { destroy(meta.id); } catch (Exception ignore) {}
+			}
+		} catch (Exception ignore) {
+			// ignore
+		}
+	}
+
+	public void startService() {
+		log.debug("Starting yarn container provider service");
+		metaService = new ZookeeperMetadataService(appConfig.metadataAddress);
+		metaService.startService();
+	}
+	
+	public void stopService() {
+		log.debug("Stopping yarn container provider service");
+		if(metaService != null) {
+			metaService.stopService();
+			metaService = null;
+		}
+	}
+	
+	void waitForState(String id, ContainerState state) throws MetadataServiceException, InterruptedException, TimeoutException {
+		long limit = System.currentTimeMillis() + CONTAINER_TIMEOUT;
+		ContainerMetadata meta = metaService.read(id);
+		while(meta.state != state) {
+			if(System.currentTimeMillis() >= limit) {
+				throw new TimeoutException(String.format("Container '%s' failed to reach state '%s' (currently is '%s')", id, state, meta.state));
+			}
+			Thread.sleep(POLL_INTERVAL);
+			meta = metaService.read(id);
+		}
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerService.java
new file mode 100644
index 0000000..8abd8df
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerService.java
@@ -0,0 +1,129 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.metamanager.managed.ManagedFactory;
+import org.apache.helix.metamanager.provider.yarn.ContainerMetadata.ContainerState;
+import org.apache.log4j.Logger;
+
+public class YarnContainerService {
+	static final Logger log = Logger.getLogger(YarnContainerService.class);
+
+	static final long CONTAINERSERVICE_INTERVAL = 1000;
+
+	final ApplicationConfig appConfig;
+	final String containerId;
+	
+	HelixManager participantManager;
+
+	MetadataService metaService;
+	ScheduledExecutorService executor;
+
+	public YarnContainerService(ApplicationConfig appConfig, MetadataService metaService, String containerId) {
+		this.appConfig = appConfig;
+		this.metaService = metaService;
+		this.containerId = containerId;
+	}
+
+	public void startService() {
+		log.debug("starting yarn container service");
+
+		executor = Executors.newSingleThreadScheduledExecutor();
+		executor.scheduleAtFixedRate(new ContainerService(), 0, CONTAINERSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+	}
+
+	public void stopService() {
+		log.debug("stopping yarn container service");
+		
+		if(executor != null) {
+			executor.shutdown();
+			while(!executor.isTerminated()) {
+				try {
+					Thread.sleep(100);
+				} catch (InterruptedException e) {
+					// ignore
+				}
+			}
+			executor = null;
+		}
+	}
+	
+	public void startParticipant() throws Exception {
+		log.info("STARTING " + containerId);
+		participantManager = HelixManagerFactory.getZKHelixManager(appConfig.clusterName,
+				containerId, InstanceType.PARTICIPANT, appConfig.clusterAddress);
+		participantManager.getStateMachineEngine().registerStateModelFactory(
+				"MasterSlave", new ManagedFactory());
+		participantManager.connect();
+		log.info("STARTED " + containerId);
+	}
+
+	public void stopParticipant() {
+		if (participantManager != null) {
+			participantManager.disconnect();
+			participantManager = null;
+		}
+	}
+	
+	class ContainerService implements Runnable {
+		@Override
+		public void run() {
+			log.info("updating container status");
+			
+			try {
+				ContainerMetadata meta = metaService.read(containerId);
+				
+				if(meta.state == ContainerState.CONNECTING) {
+					log.info("container connecting, going to active");
+					try {
+						startParticipant();
+						metaService.update(new ContainerMetadata(meta, ContainerState.ACTIVE));
+					} catch (Exception e) {
+						log.error("Failed to start participant, going to failed", e);
+						stopParticipant();
+						metaService.update(new ContainerMetadata(meta, ContainerState.FAILED));
+					}
+				}
+				
+				if(meta.state == ContainerState.ACTIVE) {
+					// do something
+					// and go to failed on error
+				}
+				
+				if(meta.state == ContainerState.TEARDOWN) {
+					log.info("container teardown, going to halted");
+					stopParticipant();
+					metaService.update(new ContainerMetadata(meta, ContainerState.HALTED));
+				}
+				
+			} catch(Exception e) {
+				log.error(String.format("Error while updating container '%s' status", containerId), e);
+			}
+		}
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerStatusProvider.java
new file mode 100644
index 0000000..54aa3da
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerStatusProvider.java
@@ -0,0 +1,52 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import org.apache.helix.metamanager.ClusterContainerStatusProvider;
+import org.apache.helix.metamanager.provider.yarn.ContainerMetadata.ContainerState;
+import org.apache.helix.metamanager.provider.yarn.MetadataService.MetadataServiceException;
+
+public class YarnContainerStatusProvider implements ClusterContainerStatusProvider {
+
+	final String metadataAddress;
+	
+	ZookeeperMetadataService metaService;
+	
+	public YarnContainerStatusProvider(String metadataAddress) {
+		this.metadataAddress = metadataAddress;
+		this.metaService = new ZookeeperMetadataService(metadataAddress);
+	}
+
+	@Override
+	public boolean exists(String id) {
+		return metaService.exists(id);
+	}
+
+	@Override
+	public boolean isActive(String id) {
+		try {
+			return metaService.read(id).state == ContainerState.ACTIVE;
+		} catch (MetadataServiceException e) {
+			return false;
+		}
+	}
+
+	@Override
+	public boolean isFailed(String id) {
+		try {
+			return metaService.read(id).state == ContainerState.FAILED;
+		} catch (Exception e) {
+			return false;
+		}
+	}
+
+	public void startService() {
+		metaService = new ZookeeperMetadataService(metadataAddress);
+		metaService.startService();
+	}
+	
+	public void stopService() {
+		if(metaService != null) {
+			metaService.stopService();
+			metaService = null;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMaster.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMaster.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMaster.java
new file mode 100644
index 0000000..f43bb67
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMaster.java
@@ -0,0 +1,134 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import java.net.InetSocketAddress;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.log4j.Logger;
+
+public class YarnMaster extends Configured implements Tool {
+
+	static final Logger log = Logger.getLogger(YarnMaster.class);
+	
+	AMRMProtocol resourceManager;
+	ApplicationAttemptId appAttemptId;
+	
+	YarnMasterService service;
+	
+	@Override
+	public int run(String[] args) throws Exception {
+		log.trace("BEGIN YarnMaster.run()");
+			
+		Configuration conf = getConf();
+		
+		this.appAttemptId = getApplicationAttemptId();
+		log.info(String.format("Got application attempt id '%s'", appAttemptId.toString()));
+		
+		log.debug("Getting resource manager");
+		this.resourceManager = getResourceManager(conf);
+
+	    // register the AM with the RM
+		log.debug("Registering application master");
+	    RegisterApplicationMasterRequest appMasterRequest = 
+	        Records.newRecord(RegisterApplicationMasterRequest.class);
+	    appMasterRequest.setApplicationAttemptId(appAttemptId);     
+	    appMasterRequest.setHost("");
+	    appMasterRequest.setRpcPort(0);
+	    appMasterRequest.setTrackingUrl("");
+
+	    resourceManager.registerApplicationMaster(appMasterRequest);
+
+	    String clusterAddress = getEnv(YarnApplication.ENV_CLUSTER_ADDRESS);
+	    String clusterName = getEnv(YarnApplication.ENV_CLUSTER_NAME);
+	    String metadataAddress = getEnv(YarnApplication.ENV_METADATA_ADDRESS);
+	    String providerName = getEnv(YarnApplication.ENV_PROVIDER_NAME);
+	    ApplicationConfig appConfig = new ApplicationConfig(clusterAddress, clusterName, metadataAddress, providerName);
+	    
+	    service = new YarnMasterService(resourceManager, conf, appAttemptId, appConfig);
+	    service.startService();
+	    
+	    Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+	    	@Override
+	    	public void run() {
+
+	    		service.stopService();
+	    		
+	    		// finish application
+	    	    log.debug("Sending finish request");
+	    	    FinishApplicationMasterRequest finishReq = 
+	    	    	Records.newRecord(FinishApplicationMasterRequest.class);
+	    	    
+	    	    finishReq.setAppAttemptId(getApplicationAttemptId());
+	    	    finishReq.setFinishApplicationStatus(FinalApplicationStatus.SUCCEEDED);
+	    	    
+	    	    try { resourceManager.finishApplicationMaster(finishReq); } catch(Exception ignore) {}
+	    	}
+	    }));
+	    
+	    try { Thread.currentThread().join(); } catch(Exception ignore) {}
+	    
+		log.trace("END YarnMaster.run()");
+		
+		return 0;
+	}
+
+	private AMRMProtocol getResourceManager(Configuration conf) {
+		// Connect to the Scheduler of the ResourceManager.
+	    YarnConfiguration yarnConf = new YarnConfiguration(conf);
+	    YarnRPC rpc = YarnRPC.create(yarnConf);
+	    InetSocketAddress rmAddress = 
+	        NetUtils.createSocketAddr(yarnConf.get(
+	            YarnConfiguration.RM_SCHEDULER_ADDRESS,
+	            YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS));           
+	    log.info("Connecting to ResourceManager at " + rmAddress);
+	    AMRMProtocol resourceManager = 
+	        (AMRMProtocol) rpc.getProxy(AMRMProtocol.class, rmAddress, conf);
+		return resourceManager;
+	}
+
+	private ApplicationAttemptId getApplicationAttemptId() {
+	    ContainerId containerId = ConverterUtils.toContainerId(getEnv(ApplicationConstants.AM_CONTAINER_ID_ENV));
+	    ApplicationAttemptId appAttemptID = containerId.getApplicationAttemptId();
+		return appAttemptID;
+	}
+	
+	private String getEnv(String key) {
+		Map<String, String> envs = System.getenv();
+	    String clusterName = envs.get(key);
+	    if (clusterName == null) {
+	      // container id should always be set in the env by the framework 
+	      throw new IllegalArgumentException(
+	          String.format("%s not set in the environment", key));
+	    }
+	    return clusterName;
+	}
+
+	public static void main(String[] args) throws Exception {
+		log.trace("BEGIN YarnMaster.main()");
+
+		try {
+			int rc = ToolRunner.run(new Configuration(), new YarnMaster(), args);
+			System.exit(rc);
+		} catch (Exception e) {
+			System.err.println(e);
+			System.exit(1);
+		}
+
+		log.trace("END YarnMaster.main()");
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMasterProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMasterProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMasterProcess.java
new file mode 100644
index 0000000..bd4fb3d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMasterProcess.java
@@ -0,0 +1,119 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import java.net.InetSocketAddress;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.log4j.Logger;
+
+public class YarnMasterProcess {
+
+	static final Logger log = Logger.getLogger(YarnMasterProcess.class);
+	
+	public static void main(String[] args) throws Exception {
+		log.trace("BEGIN YarnMaster.main()");
+
+		final ApplicationAttemptId appAttemptId = getApplicationAttemptId();
+		log.info(String.format("Got application attempt id '%s'", appAttemptId.toString()));
+		
+		log.debug("Connecting to resource manager");
+		Configuration conf = new YarnConfiguration();
+		
+		final AMRMProtocol resourceManager = getResourceManager(conf);
+
+	    // register the AM with the RM
+		log.debug("Registering application master");
+	    RegisterApplicationMasterRequest appMasterRequest = 
+	        Records.newRecord(RegisterApplicationMasterRequest.class);
+	    appMasterRequest.setApplicationAttemptId(appAttemptId);     
+	    appMasterRequest.setHost("");
+	    appMasterRequest.setRpcPort(0);
+	    appMasterRequest.setTrackingUrl("");
+
+	    resourceManager.registerApplicationMaster(appMasterRequest);
+
+	    String clusterAddress = getEnv(YarnApplication.ENV_CLUSTER_ADDRESS);
+	    String clusterName = getEnv(YarnApplication.ENV_CLUSTER_NAME);
+	    String metadataAddress = getEnv(YarnApplication.ENV_METADATA_ADDRESS);
+	    String providerName = getEnv(YarnApplication.ENV_PROVIDER_NAME);
+	    ApplicationConfig appConfig = new ApplicationConfig(clusterAddress, clusterName, metadataAddress, providerName);
+	    
+		log.debug("Launching metadata service");
+	    final ZookeeperMetadataService metaService = new ZookeeperMetadataService(metadataAddress);
+	    metaService.startService();
+	    
+		log.debug("Launching yarn master service");
+	    final YarnMasterService service = new YarnMasterService(resourceManager, conf, appAttemptId, appConfig, metaService);
+	    service.startService();
+	    
+		log.debug("Installing shutdown hooks");
+	    Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+	    	@Override
+	    	public void run() {
+
+	    		service.stopService();
+	    		
+	    		metaService.stopService();
+	    		
+	    		// finish application
+	    	    log.debug("Sending finish request");
+	    	    FinishApplicationMasterRequest finishReq = 
+	    	    	Records.newRecord(FinishApplicationMasterRequest.class);
+	    	    
+	    	    finishReq.setAppAttemptId(getApplicationAttemptId());
+	    	    finishReq.setFinishApplicationStatus(FinalApplicationStatus.SUCCEEDED);
+	    	    
+	    	    try { resourceManager.finishApplicationMaster(finishReq); } catch(Exception ignore) {}
+	    	}
+	    }));
+	    
+	    System.out.println("Press ENTER to stop master service");
+	    System.in.read();
+	    
+		log.trace("END YarnMaster.main()");
+	}
+	
+	static AMRMProtocol getResourceManager(Configuration conf) {
+		// Connect to the Scheduler of the ResourceManager.
+	    YarnConfiguration yarnConf = new YarnConfiguration(conf);
+	    YarnRPC rpc = YarnRPC.create(yarnConf);
+	    InetSocketAddress rmAddress = 
+	        NetUtils.createSocketAddr(yarnConf.get(
+	            YarnConfiguration.RM_SCHEDULER_ADDRESS,
+	            YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS));
+	    log.info("Connecting to ResourceManager at " + rmAddress);
+	    AMRMProtocol resourceManager = 
+	        (AMRMProtocol) rpc.getProxy(AMRMProtocol.class, rmAddress, conf);
+		return resourceManager;
+	}
+
+	static ApplicationAttemptId getApplicationAttemptId() {
+	    ContainerId containerId = ConverterUtils.toContainerId(getEnv(ApplicationConstants.AM_CONTAINER_ID_ENV));
+	    ApplicationAttemptId appAttemptID = containerId.getApplicationAttemptId();
+		return appAttemptID;
+	}
+	
+	static String getEnv(String key) {
+		Map<String, String> envs = System.getenv();
+	    String clusterName = envs.get(key);
+	    if (clusterName == null) {
+	      // container id should always be set in the env by the framework 
+	      throw new IllegalArgumentException(
+	          String.format("%s not set in the environment", key));
+	    }
+	    return clusterName;
+	}
+
+}


[05/15] Adding Helix-task-framework and Yarn integration modules

Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerStatusProvider.java
new file mode 100644
index 0000000..f7e3076
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerStatusProvider.java
@@ -0,0 +1,52 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import org.apache.helix.metamanager.ContainerStatusProvider;
+import org.apache.helix.metamanager.impl.yarn.ContainerMetadata.ContainerState;
+import org.apache.helix.metamanager.impl.yarn.MetadataService.MetadataServiceException;
+
+public class YarnContainerStatusProvider implements ContainerStatusProvider {
+
+	final String metadataAddress;
+	
+	ZookeeperMetadataService metaService;
+	
+	public YarnContainerStatusProvider(String metadataAddress) {
+		this.metadataAddress = metadataAddress;
+		this.metaService = new ZookeeperMetadataService(metadataAddress);
+	}
+
+	@Override
+	public boolean exists(String id) {
+		return metaService.exists(id);
+	}
+
+	@Override
+	public boolean isActive(String id) {
+		try {
+			return metaService.read(id).state == ContainerState.ACTIVE;
+		} catch (MetadataServiceException e) {
+			return false;
+		}
+	}
+
+	@Override
+	public boolean isFailed(String id) {
+		try {
+			return metaService.read(id).state == ContainerState.FAILED;
+		} catch (Exception e) {
+			return false;
+		}
+	}
+
+	public void startService() {
+		metaService = new ZookeeperMetadataService(metadataAddress);
+		metaService.startService();
+	}
+	
+	public void stopService() {
+		if(metaService != null) {
+			metaService.stopService();
+			metaService = null;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnDataProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnDataProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnDataProvider.java
new file mode 100644
index 0000000..8bd80b5
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnDataProvider.java
@@ -0,0 +1,73 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.Collection;
+
+/**
+ * Abstraction for a (remote) repository of yarn container meta data. Meta data
+ * is read and updated by {@link YarnContainerProvider}
+ * {@link YarnMasterProcess}, {@link YarnContainerProcess}.<br/>
+ * <b>NOTE:</b> Each operation is assumed to be atomic.
+ * 
+ */
+interface YarnDataProvider {
+
+    /**
+     * Checks for existence of meta data about container insatnce
+     * 
+     * @param id
+     *            unique container id
+     * @return true, if meta data exists
+     */
+    public boolean exists(String id);
+
+    /**
+     * Create meta data entry. Check for non-existence of meta data for given
+     * container id and create node.
+     * 
+     * @param data
+     *            container meta data with unique id
+     * @throws Exception
+     *             if meta data entry already exist
+     */
+    public void create(YarnContainerData data) throws Exception;
+
+    /**
+     * Read meta data for given container id.
+     * 
+     * @param id
+     *            unique container id
+     * @return yarn container data
+     * @throws Exception
+     *             if meta data entry for given id does not exist
+     */
+    public YarnContainerData read(String id) throws Exception;
+
+    /**
+     * Read all meta data stored for this domain space of yarn providers and
+     * containers.
+     * 
+     * @return collection of meta data entries, empty if none
+     * @throws Exception
+     */
+    public Collection<YarnContainerData> readAll() throws Exception;
+
+    /**
+     * Write meta data entry.
+     * 
+     * @param data
+     *            yarn container meta data
+     * @throws Exception
+     *             if meta data entry for given id does not exist
+     */
+    public void update(YarnContainerData data) throws Exception;
+
+    /**
+     * Delete meta data entry. Frees up unique id to be reused. May throw an
+     * exception on non-existence or be idempotent.
+     * 
+     * @param id
+     *            unique container id
+     * @throws Exception
+     */
+    public void delete(String id) throws Exception;
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterProcess.java
new file mode 100644
index 0000000..d4447ee
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterProcess.java
@@ -0,0 +1,144 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.net.InetSocketAddress;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.provider.ProviderProcess;
+import org.apache.log4j.Logger;
+
+/**
+ * Host process for {@link YarnContainerProviderProcess}. Hasts application
+ * master in YARN and provider participant to Helix meta cluster. (Program entry
+ * point)
+ * 
+ */
+class YarnMasterProcess {
+
+    static final Logger log = Logger.getLogger(YarnMasterProcess.class);
+
+    public static void main(String[] args) throws Exception {
+        log.trace("BEGIN YarnMaster.main()");
+
+        final ApplicationAttemptId appAttemptId = getApplicationAttemptId();
+        log.info(String.format("Got application attempt id '%s'", appAttemptId.toString()));
+
+        log.debug("Reading master properties");
+        YarnMasterProperties properties = YarnUtils.createMasterProperties(YarnUtils.getPropertiesFromPath(YarnUtils.YARN_MASTER_PROPERTIES));
+
+        if (!properties.isValid())
+            throw new IllegalArgumentException(String.format("master properties not valid: %s", properties.toString()));
+
+        log.debug("Connecting to resource manager");
+        Configuration conf = new YarnConfiguration();
+        conf.set(YarnConfiguration.RM_ADDRESS, properties.getResourceManager());
+        conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, properties.getScheduler());
+        conf.set(FileSystem.FS_DEFAULT_NAME_KEY, properties.getHdfs());
+
+        final AMRMProtocol resourceManager = getResourceManager(conf);
+
+        // register the AM with the RM
+        log.debug("Registering application master");
+        RegisterApplicationMasterRequest appMasterRequest = Records.newRecord(RegisterApplicationMasterRequest.class);
+        appMasterRequest.setApplicationAttemptId(appAttemptId);
+        appMasterRequest.setHost("");
+        appMasterRequest.setRpcPort(0);
+        appMasterRequest.setTrackingUrl("");
+
+        resourceManager.registerApplicationMaster(appMasterRequest);
+
+        log.debug("Starting yarndata service");
+        final ZookeeperYarnDataProvider yarnDataService = new ZookeeperYarnDataProvider(properties.getYarnData());
+        yarnDataService.start();
+
+        log.debug("Starting yarn master service");
+        final YarnMasterService service = new YarnMasterService();
+        service.configure(properties);
+        service.setAttemptId(appAttemptId);
+        service.setYarnDataProvider(yarnDataService);
+        service.setProtocol(resourceManager);
+        service.setYarnConfiguration(conf);
+        service.start();
+
+        log.debug("Starting provider");
+        final YarnContainerProvider provider = new YarnContainerProvider();
+        provider.configure(properties);
+        provider.start();
+
+        log.debug("Starting provider process");
+        final ProviderProcess process = new ProviderProcess();
+        process.configure(properties);
+        process.setConteinerProvider(provider);
+        process.start();
+
+        log.debug("Installing shutdown hooks");
+        Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+            @Override
+            public void run() {
+                log.debug("Stopping provider process");
+                process.stop();
+
+                log.debug("Stopping provider");
+                try { provider.stop(); } catch (Exception ignore) {}
+
+                log.debug("Stopping yarn master service");
+                service.stop();
+
+                log.debug("Stopping yarndata service");
+                yarnDataService.stop();
+
+                // finish application
+                log.debug("Sending finish request");
+                FinishApplicationMasterRequest finishReq = Records.newRecord(FinishApplicationMasterRequest.class);
+
+                finishReq.setAppAttemptId(getApplicationAttemptId());
+                finishReq.setFinishApplicationStatus(FinalApplicationStatus.SUCCEEDED);
+	    	    
+	    	    try { resourceManager.finishApplicationMaster(finishReq); } catch(Exception ignore) {}
+	    	}
+	    }));
+	    
+		log.trace("END YarnMaster.main()");
+	}
+	
+    static AMRMProtocol getResourceManager(Configuration conf) {
+        // Connect to the Scheduler of the ResourceManager.
+        YarnConfiguration yarnConf = new YarnConfiguration(conf);
+        YarnRPC rpc = YarnRPC.create(yarnConf);
+        InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(YarnConfiguration.RM_SCHEDULER_ADDRESS,
+                YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS));
+        log.info("Connecting to ResourceManager at " + rmAddress);
+        AMRMProtocol resourceManager = (AMRMProtocol) rpc.getProxy(AMRMProtocol.class, rmAddress, conf);
+        return resourceManager;
+    }
+
+    static ApplicationAttemptId getApplicationAttemptId() {
+        ContainerId containerId = ConverterUtils.toContainerId(getEnv(ApplicationConstants.AM_CONTAINER_ID_ENV));
+        ApplicationAttemptId appAttemptID = containerId.getApplicationAttemptId();
+        return appAttemptID;
+    }
+
+    static String getEnv(String key) {
+        Map<String, String> envs = System.getenv();
+        String clusterName = envs.get(key);
+        if (clusterName == null) {
+            // container id should always be set in the env by the framework
+            throw new IllegalArgumentException(String.format("%s not set in the environment", key));
+        }
+        return clusterName;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterProperties.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterProperties.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterProperties.java
new file mode 100644
index 0000000..abeb461
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterProperties.java
@@ -0,0 +1,13 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+/**
+ * Base configuration for {@link YarnMasterProcess}. 
+ *
+ */
+public class YarnMasterProperties extends YarnContainerProviderProperties {
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = -2209509980239674160L;
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterService.java
new file mode 100644
index 0000000..1e7aec3
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterService.java
@@ -0,0 +1,414 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.io.File;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ContainerManager;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.records.AMResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+/**
+ * Implements YARN application master. Continuously monitors container health in
+ * YARN and yarn meta data updates. Spawns and destroys containers.
+ * 
+ */
+class YarnMasterService implements Service {
+
+    static final Logger                     log                    = Logger.getLogger(YarnMasterService.class);
+
+    static final String                     REQUIRED_TYPE          = "container";
+
+    static final long                       ZOOKEEPER_TIMEOUT      = 5000;
+    static final long                       MASTERSERVICE_INTERVAL = 1000;
+
+    static final String                     CONTAINERS             = "CONTAINERS";
+
+    static final String                     YARN_CONTAINER_COMMAND = "/bin/sh %s 1>%s/stdout 2>%s/stderr";
+
+    YarnMasterProperties                    properties;
+    AMRMProtocol                            protocol;
+    ApplicationAttemptId                    attemptId;
+    Configuration                           yarnConfig;
+    YarnDataProvider                        yarnDataService;
+
+    final Map<ContainerId, Container>       unassignedContainers   = new HashMap<ContainerId, Container>();
+    final Map<ContainerId, Container>       activeContainers       = new HashMap<ContainerId, Container>();
+    final Map<ContainerId, ContainerStatus> completedContainers    = new HashMap<ContainerId, ContainerStatus>();
+    final Map<ContainerId, String>          yarn2meta              = new HashMap<ContainerId, String>();
+
+    ScheduledExecutorService                executor;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        YarnMasterProperties yarnProperties = YarnUtils.createMasterProperties(properties);
+        Preconditions.checkArgument(yarnProperties.isValid());
+        this.properties = yarnProperties;
+    }
+
+    public void setProtocol(AMRMProtocol protocol) {
+        this.protocol = protocol;
+    }
+
+    public void setAttemptId(ApplicationAttemptId attemptId) {
+        this.attemptId = attemptId;
+    }
+
+    public void setYarnConfiguration(Configuration yarnConfig) {
+        this.yarnConfig = yarnConfig;
+    }
+
+    public void setYarnDataProvider(YarnDataProvider yarnDataService) {
+        this.yarnDataService = yarnDataService;
+    }
+
+    @Override
+    public void start() {
+        Preconditions.checkNotNull(properties);
+        Preconditions.checkNotNull(protocol);
+        Preconditions.checkNotNull(attemptId);
+        Preconditions.checkNotNull(yarnConfig);
+        Preconditions.checkNotNull(yarnDataService);
+
+        log.debug("starting yarn master service");
+
+        executor = Executors.newSingleThreadScheduledExecutor();
+        executor.scheduleAtFixedRate(new YarnService(), 0, MASTERSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+    }
+
+    @Override
+    public void stop() {
+        log.debug("stopping yarn master service");
+
+        if (executor != null) {
+            executor.shutdown();
+            while (!executor.isTerminated()) {
+                try {
+                    Thread.sleep(100);
+                } catch (InterruptedException e) {
+                    // ignore
+                }
+            }
+            executor = null;
+        }
+
+        destroyLocalMasterNamespace();
+    }
+
+    Collection<YarnContainerData> readOwnedYarnData() throws Exception {
+        log.debug("reading container data");
+
+        Collection<YarnContainerData> containers = new ArrayList<YarnContainerData>();
+        for (YarnContainerData meta : yarnDataService.readAll()) {
+            if (meta.owner.equals(properties.getName())) {
+                containers.add(meta);
+                log.debug(String.format("found container node '%s' (state=%s, yarnId=%s, owner=%s)", meta.id, meta.state, meta.yarnId, meta.owner));
+            }
+        }
+        return containers;
+    }
+
+    class YarnService implements Runnable {
+        int responseId = 0;
+
+        @Override
+        public void run() {
+            try {
+                log.debug("running yarn service update cycle");
+
+                Collection<YarnContainerData> yarndata = readOwnedYarnData();
+
+                // active meta containers
+                int numMetaActive = countActiveMeta(yarndata);
+
+                // newly acquired meta containers
+                int numMetaAcquire = countAcquireMeta(yarndata);
+
+                // destroyed meta containers
+                List<ContainerId> destroyedReleasedIds = createDestroyedReleaseList(yarndata);
+                int numMetaCompleted = destroyedReleasedIds.size();
+
+                int numMeta = numMetaAcquire + numMetaActive + numMetaCompleted;
+
+                // yarn containers
+                int numYarnUnassigned = unassignedContainers.size();
+                int numYarnActive = activeContainers.size();
+                int numYarnCompleted = completedContainers.size();
+                int numYarn = numYarnUnassigned + numYarnActive + numYarnCompleted;
+
+                int numYarnRequired = numMetaAcquire - numYarnUnassigned;
+
+                // additionally required containers
+                int numRequestAdditional = Math.max(0, numYarnRequired);
+
+                // overstock containers
+                List<ContainerId> unneededReleasedIds = createOverstockReleaseList(numYarnRequired);
+
+                int numReleased = destroyedReleasedIds.size() + unneededReleasedIds.size();
+
+                log.debug(String.format("meta containers (total=%d, acquire=%d, active=%d, completed=%d)", numMeta, numMetaAcquire, numMetaActive, numMetaCompleted));
+                log.debug(String.format("yarn containers (total=%d, unassigned=%d, active=%d, completed=%d)", numYarn, numYarnUnassigned, numYarnActive, numYarnCompleted));
+                log.debug(String.format("requesting %d new containers, releasing %d", numRequestAdditional, numReleased));
+
+                Priority priority = Records.newRecord(Priority.class);
+                priority.setPriority(0);
+
+                Resource resource = Records.newRecord(Resource.class);
+                resource.setMemory(256); // TODO make dynamic
+
+                ResourceRequest resourceRequest = Records.newRecord(ResourceRequest.class);
+                resourceRequest.setHostName("*");
+                resourceRequest.setNumContainers(numRequestAdditional);
+                resourceRequest.setPriority(priority);
+                resourceRequest.setCapability(resource);
+
+                AllocateRequest request = Records.newRecord(AllocateRequest.class);
+                request.setResponseId(responseId);
+                request.setApplicationAttemptId(attemptId);
+                request.addAsk(resourceRequest);
+                request.addAllReleases(destroyedReleasedIds);
+                request.addAllReleases(unneededReleasedIds);
+
+                responseId++;
+
+                AllocateResponse allocateResponse = null;
+                try {
+                    allocateResponse = protocol.allocate(request);
+                } catch (YarnRemoteException e) {
+                    // ignore
+                    log.error("Error allocating containers", e);
+                    return;
+                }
+
+                AMResponse response = allocateResponse.getAMResponse();
+
+                // remove unassigned container about to be freed
+                for (ContainerId id : unneededReleasedIds) {
+                    log.info(String.format("Unassigned container '%s' about to be freed, removing", id));
+                    unassignedContainers.remove(id);
+                }
+
+                // newly added containers
+                for (Container container : response.getAllocatedContainers()) {
+                    unassignedContainers.put(container.getId(), container);
+                }
+
+                log.info(String.format("%d new containers available, %d required", unassignedContainers.size(), numMetaAcquire));
+
+                Iterator<Container> itYarn = unassignedContainers.values().iterator();
+                Iterator<YarnContainerData> itMeta = yarndata.iterator();
+                while (itYarn.hasNext() && itMeta.hasNext()) {
+                    YarnContainerData meta = itMeta.next();
+
+                    if (meta.yarnId >= 0)
+                        continue;
+
+                    Container containerYarn = itYarn.next();
+
+                    log.debug(String.format("assigning yarn container '%s' to container node '%s'", containerYarn.getId(), meta.id));
+
+                    String command = String.format(YARN_CONTAINER_COMMAND, YarnUtils.YARN_CONTAINER_PATH, ApplicationConstants.LOG_DIR_EXPANSION_VAR,
+                            ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+
+                    log.debug(String.format("Running container command \"%s\"", command));
+
+                    // configuration
+                    YarnContainerProcessProperties containerProp = meta.getProperties();
+                    containerProp.setProperty(YarnContainerProcessProperties.ADDRESS, properties.getAddress());
+                    containerProp.setProperty(YarnContainerProcessProperties.CLUSTER, properties.getCluster());
+                    containerProp.setProperty(YarnContainerProcessProperties.YARNDATA, properties.getYarnData());
+                    containerProp.setProperty(YarnContainerProcessProperties.NAME, meta.id);
+
+                    File propertiesFile = YarnUtils.writePropertiesToTemp(containerProp);
+
+                    // HDFS
+                    final String namespace = attemptId.getApplicationId().toString() + "/" + meta.id;
+                    final Path containerArchive = YarnUtils.copyToHdfs(YarnUtils.YARN_CONTAINER_STAGING, YarnUtils.YARN_CONTAINER_STAGING, namespace, yarnConfig);
+                    final Path containerProperties = YarnUtils.copyToHdfs(propertiesFile.getCanonicalPath(), YarnUtils.YARN_CONTAINER_PROPERTIES, namespace, yarnConfig);
+
+                    // local resources
+                    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
+                    localResources.put(YarnUtils.YARN_CONTAINER_DESTINATION,
+                            YarnUtils.createHdfsResource(containerArchive, LocalResourceType.ARCHIVE, yarnConfig));
+                    localResources.put(YarnUtils.YARN_CONTAINER_PROPERTIES,
+                            YarnUtils.createHdfsResource(containerProperties, LocalResourceType.FILE, yarnConfig));
+
+                    ContainerLaunchContext context = Records.newRecord(ContainerLaunchContext.class);
+                    context.setContainerId(containerYarn.getId());
+                    context.setResource(containerYarn.getResource());
+                    context.setEnvironment(Maps.<String, String> newHashMap());
+                    context.setCommands(Collections.singletonList(command));
+                    context.setLocalResources(localResources);
+                    context.setUser(properties.getUser());
+
+                    log.debug(String.format("container '%s' executing command '%s'", meta.id, command));
+
+                    StartContainerRequest startReq = Records.newRecord(StartContainerRequest.class);
+                    startReq.setContainerLaunchContext(context);
+
+                    try {
+                        getContainerManager(containerYarn).startContainer(startReq);
+
+                    } catch (YarnRemoteException e) {
+                        log.error(String.format("Error starting container '%s'", meta.id), e);
+                        return;
+                    }
+
+                    log.debug(String.format("container '%s' started, updating container node", meta.id));
+
+                    meta.setProperties(containerProp);
+                    meta.setState(ContainerState.CONNECTING);
+                    meta.setYarnId(containerYarn.getId().getId());
+                    yarnDataService.update(meta);
+
+                    yarn2meta.put(containerYarn.getId(), meta.id);
+
+                    log.debug(String.format("removing '%s' from unassigned yarn containers and adding to active list", containerYarn.getId()));
+
+                    itYarn.remove();
+                    activeContainers.put(containerYarn.getId(), containerYarn);
+
+                    // cleanup
+                    propertiesFile.deleteOnExit();
+
+                }
+
+                for (ContainerStatus status : response.getCompletedContainersStatuses()) {
+                    ContainerId id = status.getContainerId();
+
+                    log.info(String.format("Container '%s' completed", id));
+
+                    if (unassignedContainers.containsKey(id)) {
+                        log.info(String.format("Unassigned container '%s' terminated, removing", id));
+                        unassignedContainers.remove(id);
+                    }
+
+                    if (activeContainers.containsKey(id)) {
+                        log.info(String.format("Active container '%s' terminated, removing", id));
+                        activeContainers.remove(id);
+
+                        String metaId = yarn2meta.get(id);
+                        YarnContainerData meta = yarnDataService.read(metaId);
+
+                        log.debug(String.format("container '%s' finalized, updating container node", meta.id));
+
+                        yarnDataService.update(meta.setState(ContainerState.FINALIZE));
+                    }
+
+                    completedContainers.put(id, status);
+                }
+
+                log.debug("yarn service update cycle complete");
+
+            } catch (Exception e) {
+                log.error("Error while executing yarn update cycle", e);
+            }
+        }
+
+        private List<ContainerId> createOverstockReleaseList(int numYarnRequired) {
+            List<ContainerId> unneededReleasedIds = new ArrayList<ContainerId>();
+            Iterator<Container> itUnassigned = unassignedContainers.values().iterator();
+            if (numYarnRequired < 0) {
+                for (int i = 0; i < -numYarnRequired && itUnassigned.hasNext(); i++) {
+                    Container container = itUnassigned.next();
+                    unneededReleasedIds.add(container.getId());
+                    log.debug(String.format("Container '%s' no longer required, removing", container.getId()));
+                    itUnassigned.remove();
+                }
+            }
+            return unneededReleasedIds;
+        }
+
+        private List<ContainerId> createDestroyedReleaseList(Collection<YarnContainerData> yarndata) {
+            List<ContainerId> releasedIds = new ArrayList<ContainerId>();
+            for (YarnContainerData meta : yarndata) {
+                if (meta.state == ContainerState.HALTED) {
+                    ContainerId containerId = Records.newRecord(ContainerId.class);
+                    containerId.setApplicationAttemptId(attemptId);
+                    containerId.setId(meta.yarnId);
+                    releasedIds.add(containerId);
+                    log.debug(String.format("releasing container '%s'", containerId));
+                }
+            }
+            return releasedIds;
+        }
+
+        private int countAcquireMeta(Collection<YarnContainerData> yarndata) {
+            int numMetaAcquire = 0;
+            for (YarnContainerData meta : yarndata) {
+                if (meta.state == ContainerState.ACQUIRE) {
+                    numMetaAcquire++;
+                }
+            }
+            return numMetaAcquire;
+        }
+
+        private int countActiveMeta(Collection<YarnContainerData> yarndata) {
+            int numMetaActive = 0;
+            for (YarnContainerData meta : yarndata) {
+                if (meta.state != ContainerState.ACQUIRE && meta.state != ContainerState.HALTED && meta.state != ContainerState.FINALIZE) {
+                    numMetaActive++;
+                }
+            }
+            return numMetaActive;
+        }
+    }
+
+    private ContainerManager getContainerManager(Container container) {
+        YarnConfiguration yarnConf = new YarnConfiguration(yarnConfig);
+        YarnRPC rpc = YarnRPC.create(yarnConf);
+        NodeId nodeId = container.getNodeId();
+        String containerIpPort = String.format("%s:%d", nodeId.getHost(), nodeId.getPort());
+        log.info("Connecting to ContainerManager at: " + containerIpPort);
+        InetSocketAddress addr = NetUtils.createSocketAddr(containerIpPort);
+        ContainerManager cm = (ContainerManager) rpc.getProxy(ContainerManager.class, addr, yarnConfig);
+        return cm;
+    }
+
+    public static void destroyLocalMasterNamespace() {
+        log.info("cleaning up master directory");
+        FileUtils.deleteQuietly(new File(YarnUtils.YARN_MASTER_DESTINATION));
+        FileUtils.deleteQuietly(new File(YarnUtils.YARN_MASTER_PROPERTIES));
+        FileUtils.deleteQuietly(new File(YarnUtils.YARN_CONTAINER_STAGING));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnStatusProvider.java
new file mode 100644
index 0000000..b4a13b9
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnStatusProvider.java
@@ -0,0 +1,67 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.Properties;
+
+import org.apache.helix.metamanager.StatusProviderService;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+/**
+ * StatusProvider for YARN-based containers spawned via
+ * {@link YarnContainerProvider}. Reads {@link YarnDataProvider} meta data.
+ * Runnable and configurable service.
+ * 
+ */
+public class YarnStatusProvider implements StatusProviderService {
+
+    static final Logger       log = Logger.getLogger(YarnStatusProvider.class);
+
+    String                    yarndata;
+
+    ZookeeperYarnDataProvider yarnDataService;
+
+    public YarnStatusProvider() {
+        // left blank
+    }
+
+    public YarnStatusProvider(String yarndata) {
+        this.yarndata = yarndata;
+        this.yarnDataService = new ZookeeperYarnDataProvider(yarndata);
+    }
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        this.yarndata = properties.getProperty("yarndata");
+        this.yarnDataService = new ZookeeperYarnDataProvider(yarndata);
+    }
+
+    @Override
+    public void start() throws Exception {
+        yarnDataService = new ZookeeperYarnDataProvider(yarndata);
+        yarnDataService.start();
+    }
+
+    @Override
+    public void stop() throws Exception {
+        if (yarnDataService != null) {
+            yarnDataService.stop();
+            yarnDataService = null;
+        }
+    }
+
+    @Override
+    public boolean exists(String id) {
+        return yarnDataService.exists(id);
+    }
+
+    @Override
+    public boolean isHealthy(String id) {
+        try {
+            return yarnDataService.read(id).state == ContainerState.ACTIVE;
+        } catch (Exception e) {
+            log.warn(String.format("Could not get activity data of %s", id));
+            return false;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnUtils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnUtils.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnUtils.java
new file mode 100644
index 0000000..fe093c8
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnUtils.java
@@ -0,0 +1,174 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.UnsupportedFileSystemException;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.URL;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.io.Files;
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.TypeAdapter;
+import com.google.gson.stream.JsonReader;
+import com.google.gson.stream.JsonToken;
+import com.google.gson.stream.JsonWriter;
+
+/**
+ * Utility for writing property files, transferring data via HDFS and
+ * serializing {@link YarnContainerData} for zookeeper.
+ * 
+ */
+class YarnUtils {
+
+    static final Logger log                         = Logger.getLogger(YarnUtils.class);
+
+    static final String YARN_MASTER_ARCHIVE_PATH    = "target/metamanager-assembly.tar.gz";
+    static final String YARN_MASTER_PATH            = "master/metamanager/bin/yarn-master-process.sh";
+    static final String YARN_MASTER_STAGING         = "master.tar.gz";
+    static final String YARN_MASTER_DESTINATION     = "master";
+    static final String YARN_MASTER_PROPERTIES      = "master.properties";
+    static final String YARN_CONTAINER_ARCHIVE_PATH = "target/metamanager-assembly.tar.gz";
+    static final String YARN_CONTAINER_STAGING      = "container.tar.gz";
+    static final String YARN_CONTAINER_PATH         = "container/metamanager/bin/yarn-container-process.sh";
+    static final String YARN_CONTAINER_DESTINATION  = "container";
+    static final String YARN_CONTAINER_PROPERTIES   = "container.properties";
+
+    static Gson         gson;
+    static {
+        GsonBuilder builder = new GsonBuilder();
+        builder.registerTypeAdapter(ContainerState.class, new ContainerStateAdapter());
+        builder.setPrettyPrinting();
+        gson = builder.create();
+    }
+
+    public static String toJson(YarnContainerData meta) {
+        return gson.toJson(meta);
+    }
+
+    public static YarnContainerData fromJson(String json) {
+        return gson.fromJson(json, YarnContainerData.class);
+    }
+
+    public static Properties getPropertiesFromPath(String path) throws IOException {
+        Properties properties = new Properties();
+        properties.load(new InputStreamReader(new FileInputStream(path)));
+        return properties;
+    }
+
+    public static File writePropertiesToTemp(Properties properties) throws IOException {
+        File tmpFile = File.createTempFile("provider", ".properties");
+        Writer writer = Files.newWriter(tmpFile, Charset.defaultCharset());
+        properties.store(writer, null);
+        writer.flush();
+        writer.close();
+        return tmpFile;
+    }
+
+    public static Path copyToHdfs(String source, String dest, String namespace, Configuration conf) throws IOException {
+        Path sourcePath = makeQualified(source);
+        Path destPath = makeQualified(conf.get(FileSystem.FS_DEFAULT_NAME_KEY) + "/" + namespace + "/" + dest);
+        log.debug(String.format("Copying '%s' to '%s'", sourcePath, destPath));
+
+        FileSystem fs = FileSystem.get(conf);
+        fs.copyFromLocalFile(false, true, sourcePath, destPath);
+        fs.close();
+        return destPath;
+    }
+
+    public static void destroyHdfsNamespace(String namespace, Configuration conf) throws IOException {
+        Path path = makeQualified(conf.get(FileSystem.FS_DEFAULT_NAME_KEY) + "/" + namespace);
+        log.debug(String.format("Deleting '%s'", path));
+
+        FileSystem fs = FileSystem.get(conf);
+        fs.delete(path, true);
+        fs.close();
+    }
+
+    public static LocalResource createHdfsResource(Path path, LocalResourceType type, Configuration conf) throws IOException {
+        FileSystem fs = FileSystem.get(conf);
+
+        URL url = ConverterUtils.getYarnUrlFromPath(path);
+
+        FileStatus status = fs.getFileStatus(path);
+
+        LocalResource resource = Records.newRecord(LocalResource.class);
+        resource.setResource(url);
+        resource.setSize(status.getLen());
+        resource.setTimestamp(status.getModificationTime());
+        resource.setType(type);
+        resource.setVisibility(LocalResourceVisibility.APPLICATION);
+
+        fs.close();
+
+        return resource;
+    }
+
+    static Path makeQualified(String path) throws UnsupportedFileSystemException {
+        return FileContext.getFileContext().makeQualified(new Path(path));
+    }
+
+    static class ContainerStateAdapter extends TypeAdapter<ContainerState> {
+        @Override
+        public ContainerState read(JsonReader reader) throws IOException {
+            if (reader.peek() == JsonToken.NULL) {
+                reader.nextNull();
+                return null;
+            }
+            return ContainerState.valueOf(reader.nextString());
+        }
+
+        @Override
+        public void write(JsonWriter writer, ContainerState value) throws IOException {
+            if (value == null) {
+                writer.nullValue();
+                return;
+            }
+            writer.value(value.name());
+        }
+    }
+
+    static YarnContainerProcessProperties createContainerProcessProperties(Properties properties) {
+        Preconditions.checkNotNull(properties);
+        YarnContainerProcessProperties yarnProp = new YarnContainerProcessProperties();
+        yarnProp.putAll(properties);
+        return yarnProp;
+    }
+
+    static YarnContainerProviderProperties createContainerProviderProperties(Properties properties) {
+        Preconditions.checkNotNull(properties);
+        YarnContainerProviderProperties yarnProp = new YarnContainerProviderProperties();
+        yarnProp.putAll(properties);
+        return yarnProp;
+    }
+
+    static YarnMasterProperties createMasterProperties(Properties properties) {
+        Preconditions.checkNotNull(properties);
+        YarnMasterProperties yarnProp = new YarnMasterProperties();
+        yarnProp.putAll(properties);
+        return yarnProp;
+    }
+
+    private YarnUtils() {
+        // left blank
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperMetadataProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperMetadataProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperMetadataProvider.java
new file mode 100644
index 0000000..79efd8c
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperMetadataProvider.java
@@ -0,0 +1,116 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Properties;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.exception.ZkException;
+import org.apache.helix.metamanager.Service;
+import org.apache.log4j.Logger;
+
+public class ZookeeperMetadataProvider implements MetadataProvider, Service {
+
+    static final Logger log                 = Logger.getLogger(ZookeeperMetadataProvider.class);
+
+    static final String CONTAINER_NAMESPACE = "containers";
+
+    static final String BASE_PATH           = "/" + CONTAINER_NAMESPACE;
+
+    static final int    META_TIMEOUT        = 5000;
+    static final long   POLL_INTERVAL       = 100;
+
+    String              metadata;
+
+    ZkClient            client;
+
+    public ZookeeperMetadataProvider() {
+        // left blank
+    }
+
+    public ZookeeperMetadataProvider(String metadataAddress) {
+        this.metadata = metadataAddress;
+    }
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        this.metadata = properties.getProperty("metadata");
+    }
+
+    @Override
+    public void start() {
+        log.debug(String.format("starting metadata service for '%s'", metadata));
+
+        client = new ZkClient(metadata, META_TIMEOUT, META_TIMEOUT);
+
+        client.createPersistent(BASE_PATH, true);
+    }
+
+    @Override
+    public void stop() {
+        log.debug(String.format("stopping metadata service for '%s'", metadata));
+        if (client != null) {
+            client.close();
+            client = null;
+        }
+    }
+
+    @Override
+    public boolean exists(String id) {
+        return client.exists(makePath(id));
+    }
+
+    @Override
+    public void create(ContainerMetadata meta) throws MetadataException {
+        try {
+            client.createEphemeral(makePath(meta.id), YarnUtils.toJson(meta));
+        } catch (ZkException e) {
+            throw new MetadataException(e);
+        }
+    }
+
+    @Override
+    public ContainerMetadata read(String id) throws MetadataException {
+        try {
+            return YarnUtils.fromJson(client.<String> readData(makePath(id)));
+        } catch (ZkException e) {
+            throw new MetadataException(e);
+        }
+    }
+
+    @Override
+    public Collection<ContainerMetadata> readAll() throws MetadataException {
+        try {
+            Collection<ContainerMetadata> metadata = new ArrayList<ContainerMetadata>();
+            for (String id : client.getChildren(BASE_PATH)) {
+                metadata.add(YarnUtils.fromJson(client.<String> readData(makePath(id))));
+            }
+            return metadata;
+        } catch (ZkException e) {
+            throw new MetadataException(e);
+        }
+    }
+
+    @Override
+    public void update(ContainerMetadata meta) throws MetadataException {
+        try {
+            client.writeData(makePath(meta.id), YarnUtils.toJson(meta));
+        } catch (ZkException e) {
+            throw new MetadataException(e);
+        }
+    }
+
+    @Override
+    public void delete(String id) throws MetadataException {
+        try {
+            client.delete(makePath(id));
+        } catch (ZkException e) {
+            throw new MetadataException(e);
+        }
+    }
+
+    String makePath(String containerId) {
+        return BASE_PATH + "/" + containerId;
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperMetadataService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperMetadataService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperMetadataService.java
new file mode 100644
index 0000000..b0e150a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperMetadataService.java
@@ -0,0 +1,102 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.ArrayList;
+import java.util.Collection;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.exception.ZkException;
+import org.apache.log4j.Logger;
+
+public class ZookeeperMetadataService implements MetadataService {
+
+    static final Logger log                 = Logger.getLogger(ZookeeperMetadataService.class);
+
+    static final String CONTAINER_NAMESPACE = "containers";
+
+    static final String BASE_PATH           = "/" + CONTAINER_NAMESPACE;
+
+    static final long   POLL_INTERVAL       = 100;
+
+    final String        metadataAddress;
+
+    ZkClient            client;
+
+    public ZookeeperMetadataService(String metadataAddress) {
+        this.metadataAddress = metadataAddress;
+    }
+
+    public void startService() {
+        log.debug(String.format("starting metadata service for '%s'", metadataAddress));
+
+        client = new ZkClient(metadataAddress);
+
+        client.createPersistent(BASE_PATH, true);
+    }
+
+    public void stopService() {
+        log.debug(String.format("stopping metadata service for '%s'", metadataAddress));
+        if (client != null) {
+            client.close();
+            client = null;
+        }
+    }
+
+    @Override
+    public boolean exists(String id) {
+        return client.exists(makePath(id));
+    }
+
+    @Override
+    public void create(ContainerMetadata meta) throws MetadataServiceException {
+        try {
+            client.createPersistent(makePath(meta.id), YarnUtils.toJson(meta));
+        } catch (ZkException e) {
+            throw new MetadataServiceException(e);
+        }
+    }
+
+    @Override
+    public ContainerMetadata read(String id) throws MetadataServiceException {
+        try {
+            return YarnUtils.fromJson(client.<String> readData(makePath(id)));
+        } catch (ZkException e) {
+            throw new MetadataServiceException(e);
+        }
+    }
+
+    @Override
+    public Collection<ContainerMetadata> readAll() throws MetadataServiceException {
+        try {
+            Collection<ContainerMetadata> metadata = new ArrayList<ContainerMetadata>();
+            for (String id : client.getChildren(BASE_PATH)) {
+                metadata.add(YarnUtils.fromJson(client.<String> readData(makePath(id))));
+            }
+            return metadata;
+        } catch (ZkException e) {
+            throw new MetadataServiceException(e);
+        }
+    }
+
+    @Override
+    public void update(ContainerMetadata meta) throws MetadataServiceException {
+        try {
+            client.writeData(makePath(meta.id), YarnUtils.toJson(meta));
+        } catch (ZkException e) {
+            throw new MetadataServiceException(e);
+        }
+    }
+
+    @Override
+    public void delete(String id) throws MetadataServiceException {
+        try {
+            client.delete(makePath(id));
+        } catch (ZkException e) {
+            throw new MetadataServiceException(e);
+        }
+    }
+
+    String makePath(String containerId) {
+        return BASE_PATH + "/" + containerId;
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperYarnDataProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperYarnDataProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperYarnDataProvider.java
new file mode 100644
index 0000000..32af837
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperYarnDataProvider.java
@@ -0,0 +1,100 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Properties;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.metamanager.Service;
+import org.apache.log4j.Logger;
+
+/**
+ * Configurable and runnable service for {@link YarnDataProvider} based on
+ * zookeeper.
+ * 
+ */
+public class ZookeeperYarnDataProvider implements YarnDataProvider, Service {
+
+    static final Logger log                 = Logger.getLogger(ZookeeperYarnDataProvider.class);
+
+    static final String CONTAINER_NAMESPACE = "containers";
+
+    static final String BASE_PATH           = "/" + CONTAINER_NAMESPACE;
+
+    static final int    META_TIMEOUT        = 5000;
+    static final long   POLL_INTERVAL       = 100;
+
+    String              yarndata;
+
+    ZkClient            client;
+
+    public ZookeeperYarnDataProvider() {
+        // left blank
+    }
+
+    public ZookeeperYarnDataProvider(String yarndataAddress) {
+        this.yarndata = yarndataAddress;
+    }
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        this.yarndata = properties.getProperty("yarndata");
+    }
+
+    @Override
+    public void start() {
+        log.debug(String.format("starting yarndata service for '%s'", yarndata));
+
+        client = new ZkClient(yarndata, META_TIMEOUT, META_TIMEOUT);
+
+        client.createPersistent(BASE_PATH, true);
+    }
+
+    @Override
+    public void stop() {
+        log.debug(String.format("stopping yarndata service for '%s'", yarndata));
+        if (client != null) {
+            client.close();
+            client = null;
+        }
+    }
+
+    @Override
+    public boolean exists(String id) {
+        return client.exists(makePath(id));
+    }
+
+    @Override
+    public void create(YarnContainerData meta) throws Exception {
+        client.createEphemeral(makePath(meta.id), YarnUtils.toJson(meta));
+    }
+
+    @Override
+    public YarnContainerData read(String id) throws Exception {
+        return YarnUtils.fromJson(client.<String> readData(makePath(id)));
+    }
+
+    @Override
+    public Collection<YarnContainerData> readAll() throws Exception {
+        Collection<YarnContainerData> yarndata = new ArrayList<YarnContainerData>();
+        for (String id : client.getChildren(BASE_PATH)) {
+            yarndata.add(YarnUtils.fromJson(client.<String> readData(makePath(id))));
+        }
+        return yarndata;
+    }
+
+    @Override
+    public void update(YarnContainerData meta) throws Exception {
+        client.writeData(makePath(meta.id), YarnUtils.toJson(meta));
+    }
+
+    @Override
+    public void delete(String id) throws Exception {
+        client.delete(makePath(id));
+    }
+
+    String makePath(String containerId) {
+        return BASE_PATH + "/" + containerId;
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ContainerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ContainerProcess.java
new file mode 100644
index 0000000..11fb75d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ContainerProcess.java
@@ -0,0 +1,85 @@
+package org.apache.helix.metamanager.managed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.log4j.Logger;
+
+public class ContainerProcess
+{
+	static final Logger log = Logger.getLogger(ContainerProcess.class);
+	
+  private String clusterName;
+  private String zkAddress;
+  private String instanceName;
+  private HelixManager participantManager;
+
+ public ContainerProcess(String clusterName, String zkAddress, String instanceName)
+  {
+    this.clusterName = clusterName;
+    this.zkAddress = zkAddress;
+    this.instanceName = instanceName;
+
+  }
+
+  public void start() throws Exception
+  {
+    log.info("STARTING "+ instanceName);
+    participantManager = HelixManagerFactory.getZKHelixManager(clusterName,
+        instanceName, InstanceType.PARTICIPANT, zkAddress);
+    participantManager.getStateMachineEngine().registerStateModelFactory(
+        "MasterSlave", new ManagedFactory());
+    participantManager.connect();
+    log.info("STARTED "+ instanceName);
+
+  }
+
+  public void stop()
+  {
+    if (participantManager != null)
+    {
+      participantManager.disconnect();
+    }
+  }
+
+  public static void main(String[] args) throws Exception
+  {
+    final String zkAddress = args[0];
+    final String clusterName = args[1];
+    final String instanceName = args[2];
+
+    // Give a unique id to each process, most commonly used format hostname_port
+    final ContainerProcess managerProcess = new ContainerProcess(clusterName, zkAddress,
+        instanceName);
+    Runtime.getRuntime().addShutdownHook(new Thread()
+    {
+      @Override
+      public void run()
+      {
+    	  log.info("Shutting down " + instanceName);
+        managerProcess.stop();
+      }
+    });
+    managerProcess.start();
+    Thread.currentThread().join();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/HelixClusterAdmin.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/HelixClusterAdmin.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/HelixClusterAdmin.java
new file mode 100644
index 0000000..f33c09c
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/HelixClusterAdmin.java
@@ -0,0 +1,42 @@
+package org.apache.helix.metamanager.managed;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.metamanager.ClusterAdmin;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.log4j.Logger;
+
+public class HelixClusterAdmin implements ClusterAdmin {
+	
+	static final Logger log = Logger.getLogger(HelixClusterAdmin.class);
+
+	final String clusterName;
+	final String resourceName;
+	final int replica;
+	final HelixAdmin admin;
+	
+	public HelixClusterAdmin(String clusterName, String resourceName,
+			int replica, HelixAdmin admin) {
+		this.clusterName = clusterName;
+		this.resourceName = resourceName;
+		this.replica = replica;
+		this.admin = admin;
+	}
+
+	@Override
+	public synchronized void addInstance(String connection) {
+		log.debug(String.format("injecting instance %s in cluster %s", connection, clusterName));
+		admin.addInstance(clusterName, new InstanceConfig(connection));
+	}
+
+	@Override
+	public synchronized void removeInstance(String connection) {
+		log.debug(String.format("removing instance %s from cluster %s", connection, clusterName));
+		admin.dropInstance(clusterName, new InstanceConfig(connection));
+	}
+
+	@Override
+	public void rebalance() {
+		admin.rebalance(clusterName, resourceName, replica);
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalClusterManager.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalClusterManager.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalClusterManager.java
new file mode 100644
index 0000000..2bb64de
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalClusterManager.java
@@ -0,0 +1,42 @@
+package org.apache.helix.metamanager.managed;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.metamanager.ClusterAdmin;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.log4j.Logger;
+
+public class LocalClusterManager implements ClusterAdmin {
+	
+	static final Logger log = Logger.getLogger(LocalClusterManager.class);
+
+	final String clusterName;
+	final String resourceName;
+	final int replica;
+	final HelixAdmin admin;
+	
+	public LocalClusterManager(String clusterName, String resourceName,
+			int replica, HelixAdmin admin) {
+		this.clusterName = clusterName;
+		this.resourceName = resourceName;
+		this.replica = replica;
+		this.admin = admin;
+	}
+
+	@Override
+	public synchronized void addInstance(String connection) {
+		log.debug(String.format("injecting instance %s in cluster %s", connection, clusterName));
+		admin.addInstance(clusterName, new InstanceConfig(connection));
+	}
+
+	@Override
+	public synchronized void removeInstance(String connection) {
+		log.debug(String.format("removing instance %s from cluster %s", connection, clusterName));
+		admin.dropInstance(clusterName, new InstanceConfig(connection));
+	}
+
+	@Override
+	public void rebalance() {
+		admin.rebalance(clusterName, resourceName, replica);
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalContainerProvider.java
new file mode 100644
index 0000000..6c8eec0
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalContainerProvider.java
@@ -0,0 +1,87 @@
+package org.apache.helix.metamanager.managed;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+import org.apache.log4j.Logger;
+
+public class LocalContainerProvider implements ClusterContainerProvider {
+
+	static final Logger log = Logger.getLogger(LocalContainerProvider.class);
+	
+	static final String REQUIRED_TYPE = "container";
+	
+	// global view of processes required
+	static final Object staticLock = new Object();
+	static final Map<String, LocalProcess> processes = new HashMap<String, LocalProcess>();
+	
+	int connectionCounter = 0;
+	
+	final String zkAddress;
+	final String clusterName;
+	final String providerName;
+	
+	public LocalContainerProvider(String zkAddress, String clusterName, String providerName) {
+		this.zkAddress = zkAddress;
+		this.clusterName = clusterName;
+		this.providerName = providerName;
+	}
+
+	@Override
+	public void create(String id, String type) throws Exception {
+		synchronized (staticLock) {	
+			if(processes.containsKey(id))
+				throw new IllegalArgumentException(String.format("Process '%s' already exists", id));
+			
+			if(!type.equals(REQUIRED_TYPE))
+				throw new IllegalArgumentException(String.format("Type '%s' not supported", type));
+			
+			log.info(String.format("Running container '%s' (zkAddress='%s', clusterName='%s')", id, zkAddress, clusterName));
+			
+			ManagedProcess process = new ManagedProcess(clusterName, zkAddress, id);
+			process.start();
+		
+			processes.put(id, new LocalProcess(id, providerName, process));
+			
+		}
+	}
+	
+	@Override
+	public void destroy(String id) throws Exception {
+		synchronized (staticLock) {	
+			if(!processes.containsKey(id))
+				throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+			
+			log.info(String.format("Destroying container '%s'", id));
+			
+			LocalProcess local = processes.remove(id);
+			
+			local.process.stop();
+		}
+	}
+	
+	@Override
+	public void destroyAll() {
+		synchronized (staticLock) {	
+			log.info("Destroying all processes");
+			for(String id : new HashSet<String>(processes.keySet())) {
+				try { destroy(id); } catch (Exception ignore) {}
+			}
+		}
+	}
+	
+	static class LocalProcess {
+		final String id;
+		final String owner;
+		final ManagedProcess process;
+		
+		public LocalProcess(String id, String owner, ManagedProcess process) {
+			this.id = id;
+			this.owner = owner;
+			this.process = process;
+		}
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalProcessProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalProcessProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalProcessProvider.java
new file mode 100644
index 0000000..01e3ab6
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalProcessProvider.java
@@ -0,0 +1,100 @@
+package org.apache.helix.metamanager.managed;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+import org.apache.log4j.Logger;
+
+public class LocalProcessProvider implements ClusterContainerProvider {
+
+	static final Logger log = Logger.getLogger(LocalProcessProvider.class);
+	
+	static final String REQUIRED_TYPE = "container";
+	
+	Map<String, ManagedProcess> processes = new HashMap<String, ManagedProcess>();
+	Map<String, String> id2connection = new HashMap<String, String>();
+
+	int connectionCounter = 0;
+	
+	final String zkAddress;
+	final String clusterName;
+	final int basePort;
+	
+	public LocalProcessProvider(String zkAddress, String clusterName, int basePort) {
+		this.zkAddress = zkAddress;
+		this.clusterName = clusterName;
+		this.basePort = basePort;
+	}
+
+	@Override
+	public synchronized String create(String id, String type) throws Exception {
+		if(processes.containsKey(id))
+			throw new IllegalArgumentException(String.format("Process '%s' already exists", id));
+		
+		if(!type.equals(REQUIRED_TYPE))
+			throw new IllegalArgumentException(String.format("Type '%s' not supported", type));
+		
+		String connection = "localhost_" + (basePort + connectionCounter);
+		connectionCounter++;
+		
+		log.info(String.format("Running container '%s' (zkAddress='%s', clusterName='%s', connection='%s')", id, zkAddress, clusterName, connection));
+		
+		ManagedProcess p = new ManagedProcess(clusterName, zkAddress, connection);
+		
+		processes.put(id, p);
+		id2connection.put(id, connection);
+		
+		return connection;
+	}
+	
+	public synchronized void start(String id) throws Exception {
+		if(!processes.containsKey(id))
+			throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+		
+		log.info(String.format("Starting container '%s'", id));
+		
+		ManagedProcess p = processes.get(id);
+		
+		p.start();
+	}
+	
+	public synchronized void stop(String id) throws Exception {
+		if(!processes.containsKey(id))
+			throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+		
+		log.info(String.format("Stopping container '%s'", id));
+		
+		ManagedProcess p = processes.get(id);
+		
+		p.stop();
+	}
+
+	@Override
+	public synchronized String destroy(String id) throws Exception {
+		if(!processes.containsKey(id))
+			throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+		
+		log.info(String.format("Destroying container '%s'", id));
+		
+		String connection = id2connection.get(id);
+
+		processes.remove(id);
+		id2connection.remove(id);
+		
+		return connection;
+	}
+	
+	public synchronized void destroyAll() {
+		log.info("Destroying all processes");
+		for(String id : new HashSet<String>(processes.keySet())) {
+			try {
+				destroy(id);
+			} catch (Exception ignore) {
+				// ignore
+			}
+		}
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalStatusProvider.java
new file mode 100644
index 0000000..54f040f
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalStatusProvider.java
@@ -0,0 +1,22 @@
+package org.apache.helix.metamanager.managed;
+
+import org.apache.helix.metamanager.ClusterStatusProvider;
+
+public class LocalStatusProvider implements ClusterStatusProvider {
+
+	int targetContainerCount;
+	
+	public LocalStatusProvider(int targetContainerCount) {
+		this.targetContainerCount = targetContainerCount;
+	}
+
+	@Override
+	public int getTargetContainerCount(String type) {
+		return targetContainerCount;
+	}
+
+	public void setTargetContainerCount(int targetContainerCount) {
+		this.targetContainerCount = targetContainerCount;
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/Managed.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/Managed.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/Managed.java
new file mode 100644
index 0000000..1e03103
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/Managed.java
@@ -0,0 +1,64 @@
+package org.apache.helix.metamanager.managed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+@StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+public class Managed extends StateModel {
+	
+	static final Logger log = Logger.getLogger(Managed.class);
+	
+	@Transition(from = "OFFLINE", to = "SLAVE")
+	public void offlineToSlave(Message m, NotificationContext context) {
+		log.trace(String.format("%s transitioning from OFFLINE to SLAVE",
+				context.getManager().getInstanceName()));
+	}
+
+	@Transition(from = "SLAVE", to = "OFFLINE")
+	public void slaveToOffline(Message m, NotificationContext context) {
+		log.trace(String.format("%s transitioning from SLAVE to OFFLINE",
+				context.getManager().getInstanceName()));
+	}
+
+	@Transition(from = "SLAVE", to = "MASTER")
+	public void slaveToMaster(Message m, NotificationContext context) {
+		log.trace(String.format("%s transitioning from SLAVE to MASTER",
+				context.getManager().getInstanceName()));
+	}
+
+	@Transition(from = "MASTER", to = "SLAVE")
+	public void masterToSlave(Message m, NotificationContext context) {
+		log.trace(String.format("%s transitioning from MASTER to SLAVE",
+				context.getManager().getInstanceName()));
+	}
+
+	@Transition(from = "OFFLINE", to = "DROPPED")
+	public void offlineToDropped(Message m, NotificationContext context) {
+		log.trace(String.format("%s transitioning from OFFLINE to DROPPED",
+				context.getManager().getInstanceName()));
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ManagedFactory.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ManagedFactory.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ManagedFactory.java
new file mode 100644
index 0000000..f51d9c0
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ManagedFactory.java
@@ -0,0 +1,30 @@
+package org.apache.helix.metamanager.managed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.participant.statemachine.StateModelFactory;
+
+public class ManagedFactory extends StateModelFactory<Managed> {
+
+	@Override
+	public Managed createNewStateModel(String partitionName) {
+		return new Managed();
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ManagedProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ManagedProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ManagedProcess.java
new file mode 100644
index 0000000..387c459
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ManagedProcess.java
@@ -0,0 +1,85 @@
+package org.apache.helix.metamanager.managed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.log4j.Logger;
+
+public class ManagedProcess
+{
+	static final Logger log = Logger.getLogger(ManagedProcess.class);
+	
+  private String clusterName;
+  private String zkAddress;
+  private String instanceName;
+  private HelixManager participantManager;
+
+ public ManagedProcess(String clusterName, String zkAddress, String instanceName)
+  {
+    this.clusterName = clusterName;
+    this.zkAddress = zkAddress;
+    this.instanceName = instanceName;
+
+  }
+
+  public void start() throws Exception
+  {
+    log.info("STARTING "+ instanceName);
+    participantManager = HelixManagerFactory.getZKHelixManager(clusterName,
+        instanceName, InstanceType.PARTICIPANT, zkAddress);
+    participantManager.getStateMachineEngine().registerStateModelFactory(
+        "MasterSlave", new ManagedFactory());
+    participantManager.connect();
+    log.info("STARTED "+ instanceName);
+
+  }
+
+  public void stop()
+  {
+    if (participantManager != null)
+    {
+      participantManager.disconnect();
+    }
+  }
+
+  public static void main(String[] args) throws Exception
+  {
+    final String zkAddress = args[0];
+    final String clusterName = args[1];
+    final String instanceName = args[2];
+
+    // Give a unique id to each process, most commonly used format hostname_port
+    final ManagedProcess managerProcess = new ManagedProcess(clusterName, zkAddress,
+        instanceName);
+    Runtime.getRuntime().addShutdownHook(new Thread()
+    {
+      @Override
+      public void run()
+      {
+    	  log.info("Shutting down " + instanceName);
+        managerProcess.stop();
+      }
+    });
+    managerProcess.start();
+    Thread.currentThread().join();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ShellContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ShellContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ShellContainerProvider.java
new file mode 100644
index 0000000..107f2c6
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ShellContainerProvider.java
@@ -0,0 +1,85 @@
+package org.apache.helix.metamanager.managed;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+import org.apache.log4j.Logger;
+
+public class ShellContainerProvider implements ClusterContainerProvider {
+
+	static final Logger log = Logger.getLogger(ShellContainerProvider.class);
+	
+	static final String REQUIRED_TYPE = "container";
+	static final String RUN_COMMAND = "/bin/sh";
+	
+	// global view of processes required
+	static final Object staticLock = new Object();
+	static final Map<String, ShellProcess> processes = new HashMap<String, ShellProcess>();
+
+	final String zkAddress;
+	final String clusterName;
+	final String command;
+	final String providerName;
+	
+	public ShellContainerProvider(String zkAddress, String clusterName, String owner, String command) {
+		this.zkAddress = zkAddress;
+		this.clusterName = clusterName;
+		this.command = command;
+		this.providerName = owner;
+	}
+
+	@Override
+	public void create(String id, String type) throws Exception {
+		synchronized (staticLock) {
+			if(processes.containsKey(id))
+				throw new IllegalArgumentException(String.format("Process '%s' already exists", id));
+			
+			if(!type.equals(REQUIRED_TYPE))
+				throw new IllegalArgumentException(String.format("Type '%s' not supported", type));
+			
+			log.info(String.format("Running container '%s' (zkAddress='%s', clusterName='%s', command='%s')", id, zkAddress, clusterName, command));
+			
+			ProcessBuilder builder = new ProcessBuilder(RUN_COMMAND, command, zkAddress, clusterName, id);
+			Process process = builder.start();
+			
+			processes.put(id, new ShellProcess(id, providerName, process));
+		}
+	}
+	
+	@Override
+	public void destroy(String id) throws Exception {
+		synchronized (staticLock) {
+			if(!processes.containsKey(id))
+				throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+			
+			log.info(String.format("Destroying container '%s'", id));
+			
+			ShellProcess shell = processes.remove(id);
+			shell.process.destroy();
+		}
+	}
+	
+	@Override
+	public void destroyAll() {
+		synchronized (staticLock) {
+			log.info("Destroying all processes");
+			for(ShellProcess process : new HashSet<ShellProcess>(processes.values())) {
+				try { destroy(process.id); } catch (Exception ignore) {}
+			}
+		}
+	}
+	
+	static class ShellProcess {
+		final String id;
+		final String owner;
+		final Process process;
+
+		public ShellProcess(String id, String owner, Process process) {
+			this.id = id;
+			this.owner = owner;
+			this.process = process;
+		}		
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ShellProcessProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ShellProcessProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ShellProcessProvider.java
new file mode 100644
index 0000000..0def0f5
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ShellProcessProvider.java
@@ -0,0 +1,148 @@
+package org.apache.helix.metamanager.managed;
+
+import java.lang.reflect.Field;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+import org.apache.log4j.Logger;
+
+public class ShellProcessProvider implements ClusterContainerProvider {
+
+	static final Logger log = Logger.getLogger(ShellProcessProvider.class);
+	
+	static final String REQUIRED_TYPE = "container";
+	static final String RUN_COMMAND = "/bin/sh";
+	static final String KILL_COMMAND = "kill -s SIGINT %d";
+	
+	Map<String, ProcessBuilder> builders = new HashMap<String, ProcessBuilder>();
+	Map<String, Process> processes = new HashMap<String, Process>();
+	Map<String, String> id2connection = new HashMap<String, String>();
+
+	int connectionCounter = 0;
+	
+	final String zkAddress;
+	final String clusterName;
+	final int basePort;
+	final String command;
+	
+	public ShellProcessProvider(String zkAddress, String clusterName, int basePort, String command) {
+		this.zkAddress = zkAddress;
+		this.clusterName = clusterName;
+		this.basePort = basePort;
+		this.command = command;
+	}
+
+	@Override
+	public synchronized String create(String id, String type) throws Exception {
+		if(builders.containsKey(id))
+			throw new IllegalArgumentException(String.format("Process '%s' already exists", id));
+		
+		if(!type.equals(REQUIRED_TYPE))
+			throw new IllegalArgumentException(String.format("Type '%s' not supported", type));
+		
+		String connection = "localhost_" + (basePort + connectionCounter);
+		connectionCounter++;
+		
+		log.info(String.format("Running container '%s' (zkAddress='%s', clusterName='%s', connection='%s', command='%s')", id, zkAddress, clusterName, connection, command));
+		
+		ProcessBuilder builder = new ProcessBuilder(RUN_COMMAND, command, zkAddress, clusterName, connection);
+		
+		builders.put(id, builder);
+		id2connection.put(id, connection);
+		
+		return connection;
+	}
+	
+	public synchronized void start(String id) throws Exception {
+		if(!builders.containsKey(id))
+			throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+		
+		if(processes.containsKey(id))
+			throw new IllegalArgumentException(String.format("Process '%s' already running", id));
+		
+		log.info(String.format("Starting container '%s'", id));
+		
+		Process p = builders.get(id).start();
+		
+		processes.put(id, p);
+	}
+	
+	public synchronized void stop(String id) throws Exception {
+		if(!builders.containsKey(id))
+			throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+		
+		if(!processes.containsKey(id))
+			throw new IllegalArgumentException(String.format("Process '%s' not running", id));
+		
+		log.info(String.format("Stopping container '%s'", id));
+		
+		Process p = processes.get(id);
+		
+		int pid = getUnixPID(p);
+		Runtime.getRuntime().exec(String.format(KILL_COMMAND, pid));
+		
+		int retVal = p.waitFor();
+		if(retVal != 130) {
+			log.warn(String.format("Process %d returned %d (should be 130, SIGINT)", pid, retVal));
+		}
+		
+		processes.remove(id);
+		
+	}
+
+	@Override
+	public synchronized String destroy(String id) throws Exception {
+		if(!builders.containsKey(id))
+			throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+		
+		log.info(String.format("Destroying container '%s'", id));
+		
+		if(processes.containsKey(id)) {
+			log.warn(String.format("Forcibly terminating running container '%s'", id));
+			processes.get(id).destroy();
+			processes.remove(id);
+		}
+		
+		String connection = id2connection.get(id);
+
+		builders.remove(id);
+		id2connection.remove(id);
+		
+		return connection;
+	}
+	
+	public synchronized void destroyAll() {
+		log.info("Destroying all processes");
+		for(String id : new HashSet<String>(processes.keySet())) {
+			try {
+				destroy(id);
+			} catch (Exception ignore) {
+				// ignore
+			}
+		}
+	}
+	
+	// TODO get PID independently of platform
+    static int getUnixPID(Process process) throws IllegalArgumentException, IllegalAccessException, NoSuchFieldException {
+        if (process.getClass().getName().equals("java.lang.UNIXProcess")) {
+            Class<?> proc = process.getClass();
+            Field field = proc.getDeclaredField("pid");
+            Object value = getFieldValue(field, process);
+            return ((Integer) value).intValue();
+        } else {
+            throw new IllegalArgumentException("Not a UNIXProcess");
+        }
+    }
+    
+    static Object getFieldValue(Field field, Object object) throws IllegalArgumentException, IllegalAccessException {
+    	Object value;
+    	boolean accessible = field.isAccessible();
+    	field.setAccessible(true);
+    	value = field.get(object);
+    	field.setAccessible(accessible);
+    	return value;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/YarnContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/YarnContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/YarnContainerProvider.java
new file mode 100644
index 0000000..629788e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/YarnContainerProvider.java
@@ -0,0 +1,37 @@
+package org.apache.helix.metamanager.managed;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+
+public class YarnContainerProvider implements ClusterContainerProvider {
+
+	@Override
+	public String create(String id, String type) throws Exception {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	@Override
+	public String destroy(String id) throws Exception {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	@Override
+	public void start(String id) throws Exception {
+		// TODO Auto-generated method stub
+
+	}
+
+	@Override
+	public void stop(String id) throws Exception {
+		// TODO Auto-generated method stub
+
+	}
+
+	@Override
+	public void destroyAll() {
+		// TODO Auto-generated method stub
+
+	}
+
+}


[09/15] Adding Helix-task-framework and Yarn integration modules

Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/assembly/assembly.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/assembly/assembly.xml b/recipes/meta-cluster-manager/src/main/assembly/assembly.xml
new file mode 100644
index 0000000..03b2ca5
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/assembly/assembly.xml
@@ -0,0 +1,32 @@
+<assembly
+  xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2" 
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2
+  http://maven.apache.org/xsd/assembly-1.1.2.xsd">
+
+  <id>assembly</id>
+  <formats>
+    <format>tar.gz</format>
+  </formats>
+  <baseDirectory>metamanager</baseDirectory>
+  <fileSets>
+    <fileSet>
+      <directory>target/metamanager-pkg/repo</directory>
+      <outputDirectory>repo</outputDirectory>
+      <excludes>
+        <exclude>**/maven-metadata-appassembler.xml</exclude>
+      </excludes>
+      <fileMode>0644</fileMode>
+    </fileSet> 
+    <fileSet>
+      <directory>target/metamanager-pkg/bin</directory>
+      <outputDirectory>bin</outputDirectory>
+      <fileMode>0755</fileMode>
+    </fileSet> 
+    <fileSet>
+      <directory>target/metamanager-pkg/conf</directory>
+      <outputDirectory>conf</outputDirectory>
+      <fileMode>0644</fileMode>
+    </fileSet> 
+  </fileSets>
+</assembly>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/config/log4j.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/config/log4j.properties b/recipes/meta-cluster-manager/src/main/config/log4j.properties
new file mode 100644
index 0000000..af33e21
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/config/log4j.properties
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Set root logger level to DEBUG and its only appender to R.
+log4j.rootLogger=ERROR, C
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.C=org.apache.log4j.ConsoleAppender
+log4j.appender.C.layout=org.apache.log4j.PatternLayout
+log4j.appender.C.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.I0Itec=ERROR
+log4j.logger.org.apache=ERROR
+
+log4j.logger.org.apache.helix.metamanager=INFO

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterAdmin.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterAdmin.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterAdmin.java
new file mode 100644
index 0000000..9a83f02
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterAdmin.java
@@ -0,0 +1,30 @@
+package org.apache.helix.metamanager;
+
+/**
+ * Abstraction for instance config (container) injection into and removal from
+ * the managed cluster.
+ * 
+ */
+public interface ClusterAdmin {
+
+    /**
+     * Add instance configuration to managed cluster.
+     * 
+     * @param instanceId
+     * @param instanceTag
+     */
+    public void addInstance(String instanceId, String instanceTag);
+
+    /**
+     * Remove instance configuration from managed cluster.<br/>
+     * <b>INVARIANT:</b> idempotent
+     * 
+     * @param instanceId
+     */
+    public void removeInstance(String instanceId);
+
+    /**
+     * Trigger rebalance of any affected resource in the managed cluster.
+     */
+    public void rebalance();
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterContainerProvider.java
new file mode 100644
index 0000000..6aca07a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterContainerProvider.java
@@ -0,0 +1,32 @@
+package org.apache.helix.metamanager;
+
+public interface ClusterContainerProvider {
+	/**
+	 * Create container of given type.
+	 * 
+	 * @param id
+	 *            unique user-defined container id
+	 * @param type
+	 *            container type
+	 * @throws Exception
+	 * @return connection string
+	 */
+	public void create(String id, String type) throws Exception;
+
+	/**
+	 * Destroy container.
+	 * 
+	 * @param id
+	 *            unique user-defined container id
+	 * @return connection string
+	 * @throws Exception
+	 */
+	public void destroy(String id) throws Exception;
+
+	/**
+	 * Stops all running processes and destroys containers. Best-effort for
+	 * cleanup.
+	 * 
+	 */
+	public void destroyAll();
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterContainerStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterContainerStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterContainerStatusProvider.java
new file mode 100644
index 0000000..e68c0ee
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterContainerStatusProvider.java
@@ -0,0 +1,7 @@
+package org.apache.helix.metamanager;
+
+public interface ClusterContainerStatusProvider {
+	public boolean exists(String id);
+	public boolean isActive(String id);
+	public boolean isFailed(String id);
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterInstanceInjector.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterInstanceInjector.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterInstanceInjector.java
new file mode 100644
index 0000000..d29e1c3
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterInstanceInjector.java
@@ -0,0 +1,6 @@
+package org.apache.helix.metamanager;
+
+public interface ClusterInstanceInjector {
+	public void addInstance(String connection);
+	public void removeInstance(String connection);
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterStatusProvider.java
new file mode 100644
index 0000000..1812dc3
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterStatusProvider.java
@@ -0,0 +1,5 @@
+package org.apache.helix.metamanager;
+
+public interface ClusterStatusProvider {
+	public int getTargetContainerCount(String containerType) throws Exception;
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ConfigTool.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ConfigTool.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ConfigTool.java
new file mode 100644
index 0000000..596743b
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ConfigTool.java
@@ -0,0 +1,47 @@
+package org.apache.helix.metamanager;
+
+import org.apache.log4j.Logger;
+
+public class ConfigTool {
+	
+	static final Logger log = Logger.getLogger(ConfigTool.class);
+	
+    public static final String SHELL_CONTAINER_PATH        = "target/metamanager-pkg/bin/shell-container-process.sh";
+    public static final String SHELL_CONTAINER_PROPERTIES  = "container.properties";
+    public static final String SHELL_CONTAINER_MARKER      = "active";
+
+    public static final String YARN_MASTER_ARCHIVE_PATH    = "target/metamanager-assembly.tar.gz";
+    public static final String YARN_MASTER_PATH            = "master/metamanager/bin/yarn-master-process.sh";
+    public static final String YARN_MASTER_STAGING         = "master.tar.gz";
+    public static final String YARN_MASTER_DESTINATION     = "master";
+    public static final String YARN_MASTER_PROPERTIES      = "master.properties";
+    public static final String YARN_CONTAINER_ARCHIVE_PATH = "target/metamanager-assembly.tar.gz";
+    public static final String YARN_CONTAINER_STAGING      = "container.tar.gz";
+    public static final String YARN_CONTAINER_PATH         = "container/metamanager/bin/yarn-container-process.sh";
+    public static final String YARN_CONTAINER_DESTINATION  = "container";
+    public static final String YARN_CONTAINER_PROPERTIES   = "container.properties";
+
+    public static final long   CONTAINER_TIMEOUT           = 60000;
+
+	static TargetProvider targetProvider;
+	static StatusProvider statusProvider;
+	
+	private ConfigTool() {
+		// left blank
+	}
+	
+	public static TargetProvider getTargetProvider() {
+		return targetProvider;
+	}
+	public static void setTargetProvider(TargetProvider targetProvider) {
+		ConfigTool.targetProvider = targetProvider;
+	}
+	
+	public static StatusProvider getStatusProvider() {
+		return statusProvider;
+	}
+	public static void setStatusProvider(StatusProvider statusProvider) {
+		ConfigTool.statusProvider = statusProvider;
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerProvider.java
new file mode 100644
index 0000000..2483bba
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerProvider.java
@@ -0,0 +1,40 @@
+package org.apache.helix.metamanager;
+
+import org.apache.helix.metamanager.provider.ProviderStateModel;
+
+/**
+ * Abstraction for container deployment framework. Creates and destroys
+ * container instances. Is invoked by ProviderStateModel and must be blocking.
+ * 
+ * @see ProviderStateModel
+ */
+public interface ContainerProvider {
+    /**
+     * Create container of given type.<br/>
+     * <b>INVARIANT:</b> synchronous invocation
+     * 
+     * @param id
+     *            unique user-defined container id
+     * @param containerType
+     *            container type
+     * @throws Exception
+     */
+    public void create(String id, String containerType) throws Exception;
+
+    /**
+     * Destroy container.<br/>
+     * <b>INVARIANT:</b> synchronous invocation
+     * 
+     * @param id
+     *            unique user-defined container id
+     * @throws Exception
+     */
+    public void destroy(String id) throws Exception;
+
+    /**
+     * Stops all running processes and destroys containers. Best-effort for
+     * cleanup.
+     * 
+     */
+    public void destroyAll();
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerProviderService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerProviderService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerProviderService.java
new file mode 100644
index 0000000..a7da053
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerProviderService.java
@@ -0,0 +1,9 @@
+package org.apache.helix.metamanager;
+
+/**
+ * ContainerProvider as configurable service.
+ * 
+ */
+public interface ContainerProviderService extends ContainerProvider, Service {
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerStatusProvider.java
new file mode 100644
index 0000000..d2853d9
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerStatusProvider.java
@@ -0,0 +1,7 @@
+package org.apache.helix.metamanager;
+
+public interface ContainerStatusProvider {
+	public boolean exists(String id);
+	public boolean isActive(String id);
+	public boolean isFailed(String id);
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/FileStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/FileStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/FileStatusProvider.java
new file mode 100644
index 0000000..06e2251
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/FileStatusProvider.java
@@ -0,0 +1,27 @@
+package org.apache.helix.metamanager;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+
+public class FileStatusProvider implements ClusterStatusProvider {
+
+	final File file;
+	
+	public FileStatusProvider(File file) {
+		this.file = file;
+	}
+
+	@Override
+	public int getTargetContainerCount(String containerType) throws FileNotFoundException, IOException, IllegalArgumentException {
+		Properties properties = new Properties();
+		properties.load(new FileReader(file));
+		if(!properties.contains(containerType))
+			throw new IllegalArgumentException(String.format("container type '%s' not found in '%s'", containerType, file.getCanonicalPath()));
+		return Integer.parseInt((String)properties.get(containerType));
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/HelixClusterAdmin.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/HelixClusterAdmin.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/HelixClusterAdmin.java
new file mode 100644
index 0000000..3dd2f48
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/HelixClusterAdmin.java
@@ -0,0 +1,43 @@
+package org.apache.helix.metamanager;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.log4j.Logger;
+
+/**
+ * Implementation of ClusterAdmin based on Helix.
+ * 
+ */
+public class HelixClusterAdmin implements ClusterAdmin {
+
+    static final Logger log = Logger.getLogger(HelixClusterAdmin.class);
+
+    final String        cluster;
+    final HelixAdmin    admin;
+
+    public HelixClusterAdmin(String clusterName, HelixAdmin admin) {
+        this.cluster = clusterName;
+        this.admin = admin;
+    }
+
+    @Override
+    public synchronized void addInstance(String instanceId, String instanceTag) {
+        log.debug(String.format("injecting instance %s (tag=%s) in cluster %s", instanceId, instanceTag, cluster));
+        admin.addInstance(cluster, new InstanceConfig(instanceId));
+        admin.addInstanceTag(cluster, instanceId, instanceTag);
+    }
+
+    @Override
+    public synchronized void removeInstance(String connection) {
+        log.debug(String.format("removing instance %s from cluster %s", connection, cluster));
+        admin.dropInstance(cluster, new InstanceConfig(connection));
+    }
+
+    @Override
+    public void rebalance() {
+        for (String resourceName : admin.getResourcesInCluster(cluster)) {
+            int replica = Integer.parseInt(admin.getResourceIdealState(cluster, resourceName).getReplicas());
+            admin.rebalance(cluster, resourceName, replica);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/Manager.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/Manager.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/Manager.java
new file mode 100644
index 0000000..ab91ae7
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/Manager.java
@@ -0,0 +1,129 @@
+package org.apache.helix.metamanager;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+@StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE" })
+public class Manager extends StateModel {
+	
+	static final Logger log = Logger.getLogger(Manager.class);
+	
+	ClusterContainerProvider provider;
+	ClusterAdmin admin;
+
+	public Manager(ClusterContainerProvider provider, ClusterAdmin admin) {
+		this.provider = provider;
+		this.admin = admin;
+	}
+
+	@Transition(from = "OFFLINE", to = "ONLINE")
+	public void acquire(Message m, NotificationContext context) throws Exception {
+		String containerType = m.getResourceName();
+		String containerId = m.getPartitionName();
+		String instanceId = context.getManager().getInstanceName();
+		
+		log.trace(String.format("%s:%s transitioning from OFFLINE to ONLINE",
+				containerId, instanceId));
+		
+		bestEffortRemove(containerId);
+		
+		// add instance to cluster
+		admin.addInstance(containerId);
+		
+		// create container
+		provider.create(containerId, containerType);
+
+		try {
+			admin.rebalance();
+		} catch (Exception e) {
+			// ignore
+			log.warn(String.format("rebalancing cluster failed (error='%s')", e.getMessage()));
+		}
+
+		log.info(String.format("%s acquired container '%s' (type='%s')",
+				instanceId, containerId, containerType));
+	}
+
+	@Transition(from = "ONLINE", to = "OFFLINE")
+	public void release(Message m, NotificationContext context) {
+		String containerId = m.getPartitionName();
+		String instanceId = context.getManager().getInstanceName();
+
+		log.trace(String.format("%s:%s transitioning from ONLINE to OFFLINE",
+				containerId, instanceId));
+		
+		bestEffortRemove(containerId);
+		
+		try {
+			admin.rebalance();
+		} catch (Exception e) {
+			// ignore
+			log.warn(String.format("rebalancing cluster failed (error='%s')", e.getMessage()));
+		}
+
+		log.info(String.format("%s destroyed container '%s'",
+				instanceId, containerId));
+
+	}
+
+	@Transition(from = "ERROR", to = "OFFLINE")
+	public void recover(Message m, NotificationContext context) {
+		String containerId = m.getPartitionName();
+		String instanceId = context.getManager().getInstanceName();
+
+		log.trace(String.format("%s:%s transitioning from ERROR to OFFLINE",
+				containerId, instanceId));
+	}
+	
+	@Transition(from = "OFFLINE", to = "DROPPED")
+	public void drop(Message m, NotificationContext context) {
+		String containerId = m.getPartitionName();
+		String instanceId = context.getManager().getInstanceName();
+
+		log.trace(String.format("%s:%s transitioning from OFFLINE to DROPPED",
+				containerId, instanceId));
+	}
+	
+	private void bestEffortRemove(String containerId) {
+		log.debug(String.format("Best effort removal of container '%s'", containerId));
+		
+		try {
+			provider.destroy(containerId);
+			log.debug(String.format("Container '%s' destroyed", containerId));
+		} catch (Exception e) {
+			log.debug(String.format("Container '%s' does not exist", containerId));
+		}
+		
+		try {
+			admin.removeInstance(containerId);
+			log.debug(String.format("Instance '%s' removed", containerId));
+		} catch (Exception e) {
+			log.debug(String.format("Instance '%s' does not exist", containerId));
+		}
+		
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerDemo.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerDemo.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerDemo.java
new file mode 100644
index 0000000..35891f0
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerDemo.java
@@ -0,0 +1,463 @@
+package org.apache.helix.metamanager;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.I0Itec.zkclient.IDefaultNameSpace;
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkServer;
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.managed.HelixClusterAdmin;
+import org.apache.helix.metamanager.managed.LocalStatusProvider;
+import org.apache.helix.metamanager.provider.local.LocalContainerProvider;
+import org.apache.helix.metamanager.provider.local.LocalContainerStatusProvider;
+import org.apache.helix.metamanager.provider.shell.ShellContainerProvider;
+import org.apache.helix.metamanager.provider.shell.ShellContainerStatusProvider;
+import org.apache.helix.metamanager.provider.yarn.ApplicationConfig;
+import org.apache.helix.metamanager.provider.yarn.YarnApplication;
+import org.apache.helix.metamanager.provider.yarn.YarnContainerProvider;
+import org.apache.helix.metamanager.provider.yarn.YarnContainerStatusProvider;
+import org.apache.helix.model.ExternalView;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.IdealStateModeProperty;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.tools.ClusterStateVerifier;
+import org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+public class ManagerDemo
+{
+	static final long TIMESTEP_INTERVAL = 1000;
+	
+	static final String MANAGED_PROCESS_PATH = "target/meta-cluster-manager-pkg/bin/container-process.sh";
+	static final String YARN_PROCESS_PATH    = "/home/apucher/incubator-helix/recipes/meta-cluster-manager/target/meta-cluster-manager-pkg/bin/yarn-container-process.sh";
+
+	static final String PROVIDER_LOCAL = "LOCAL";
+	static final String PROVIDER_SHELL = "SHELL";
+	static final String PROVIDER_YARN  = "YARN";
+	
+	static final Logger log = Logger.getLogger(ManagerDemo.class);
+	
+	static final int zkPort = 2199; 
+	static final String zkAddress = "localhost:" + zkPort;
+	static final String metaClusterName = "meta-cluster";
+	static final String managedClusterName = "managed-cluster";
+	static final String metaResourceName = "container";
+	static final String managedResourceName = "database";
+	
+	static final int numContainerProviders = 3;
+	static final int numContainerMax = 7;
+	static final int numContainerMin = 3;
+	static final int numContainerStep = 2;
+	static final int numContainerReplica = 1;
+	
+	static final int numManagedPartitions = 10;
+	static final int numManagedReplica = 2;
+	
+	static List<ClusterContainerProvider> providers = new ArrayList<ClusterContainerProvider>();
+	static int providerCount = 0;
+	    
+	static Collection<YarnContainerProvider> yarnProviders = new ArrayList<YarnContainerProvider>();
+	static Collection<YarnContainerStatusProvider> yarnStatusProviders = new ArrayList<YarnContainerStatusProvider>();
+	static Collection<YarnApplication> yarnApplications = new ArrayList<YarnApplication>();
+	
+  /**
+   * LockManagerDemo clusterName, numInstances, lockGroupName, numLocks
+   * 
+   * @param args
+   * @throws Exception
+   */
+  public static void main(String[] args) throws Exception
+  {
+	  
+	String containerProviderType = PROVIDER_LOCAL;
+	if(args.length >= 1) {
+		containerProviderType = args[0];
+	}
+	  
+    LocalStatusProvider clusterStatusProvider = null;
+    ManagerProcess[] managerProcesses = new ManagerProcess[numContainerProviders];
+
+    HelixManager metaControllerManager = null;
+    HelixManager managedControllerManager = null;
+    
+    Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+		@Override
+		public void run() {
+		      for(ClusterContainerProvider provider : providers) {
+			      log.info("Destroying all containers of provider");
+			      provider.destroyAll();
+		      }
+		      for(YarnContainerProvider provider : yarnProviders) {
+			      log.info("Stopping yarn container provider");
+			      provider.stopService();   
+		      }
+		      for(YarnContainerStatusProvider provider : yarnStatusProviders) {
+			      log.info("Stopping yarn container status provider");
+			      provider.stopService();   
+		      }
+			  for(YarnApplication application: yarnApplications) {
+				  log.info("Stopping yarn application");
+				  try { application.stop(); } catch(Exception ignore) {}
+		      }
+		}
+	}));
+
+    try
+    {
+      log.info("Starting ZooKeeper");
+      startLocalZookeeper();
+      HelixAdmin admin = new ZKHelixAdmin(zkAddress);
+
+      log.info("Create clusters");
+      admin.addCluster(metaClusterName, true);
+      admin.addCluster(managedClusterName, true);
+      
+      log.info("Create providers");
+      clusterStatusProvider = new LocalStatusProvider(numContainerMin);
+      ClusterContainerStatusProvider containerStatusProvider = createContainerStatusProvider(containerProviderType); 
+      
+      log.info("Setup config tool");
+      ConfigTool.setClusterStatusProvider(clusterStatusProvider);
+      ConfigTool.setContainerStatusProvider(containerStatusProvider);
+      
+      // Managed Cluster
+      log.info("Setup managed cluster");
+      admin.addStateModelDef(managedClusterName, "MasterSlave",
+          new StateModelDefinition(StateModelConfigGenerator.generateConfigForMasterSlave()));
+      admin.addResource(managedClusterName, managedResourceName, numManagedPartitions,
+          "MasterSlave", IdealStateModeProperty.AUTO_REBALANCE.toString());
+      admin.rebalance(managedClusterName, managedResourceName, numManagedReplica);
+      
+      // Meta Cluster
+      log.info("Setup meta cluster");
+      admin.addStateModelDef(metaClusterName, "OnlineOffline",
+          new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+      admin.addResource(metaClusterName, metaResourceName, clusterStatusProvider.getTargetContainerCount(""),
+          "OnlineOffline", IdealStateModeProperty.AUTO_REBALANCE.toString());
+      
+      IdealState idealState = admin.getResourceIdealState(metaClusterName, metaResourceName);
+      idealState.setRebalancerClassName(ManagerRebalancer.class.getName());
+      //idealState.getRecord().setSimpleField(IdealStateProperty.REBALANCE_TIMER_PERIOD.toString(), "2000"); // Timer trigger creates race condition
+      admin.setResourceIdealState(metaClusterName, metaResourceName, idealState);	  
+      admin.rebalance(metaClusterName, metaResourceName, 1);
+      
+      log.info("Starting meta processes (container providers)");
+      for (int i = 0; i < numContainerProviders; i++)
+      {
+        String instanceName = "provider_" + i;
+        admin.addInstance(metaClusterName, new InstanceConfig(instanceName));
+        
+        ClusterAdmin clusterAdmin = new HelixClusterAdmin(managedClusterName, managedResourceName, numManagedReplica, admin);
+
+        managerProcesses[i] = new ManagerProcess(metaClusterName, zkAddress,
+                instanceName, createContainerProvider(containerProviderType), clusterAdmin);
+        managerProcesses[i].start();
+      }
+      
+      log.info("Starting managed cluster controller");
+      managedControllerManager = HelixControllerMain.startHelixController(zkAddress,
+          managedClusterName, "managedController", HelixControllerMain.STANDALONE);
+      log.info("Starting meta cluster controller");
+      metaControllerManager = HelixControllerMain.startHelixController(zkAddress,
+          metaClusterName, "metaController", HelixControllerMain.STANDALONE);
+          
+      waitUntilRebalancedCount(numContainerMin, admin);
+      printStep("Initial cluster state", admin);
+      
+      while(clusterStatusProvider.getTargetContainerCount("") < numContainerMax) {
+    	  int newCount = clusterStatusProvider.getTargetContainerCount("") + numContainerStep;
+    	  
+          log.info(String.format("Increasing container count to %d", newCount));
+	      clusterStatusProvider.setTargetContainerCount(newCount);
+	      
+	      triggerPipeline(admin);	      
+	      waitUntilRebalancedCount(newCount, admin);
+	      printStep(String.format("Increased container count to %d", newCount), admin);
+      }
+      
+      log.info("Destroying container 0 and container 1");
+	    int currentCount = clusterStatusProvider.getTargetContainerCount("");
+      providers.get(0).destroy("container_0");
+      providers.get(0).destroy("container_1");
+      triggerPipeline(admin);
+      waitUntilRebalancedCount(currentCount, admin);
+      printStep("Destroyed container 0 and container 1", admin);
+      
+      log.info("Destroying container provider 0");
+	  currentCount = clusterStatusProvider.getTargetContainerCount("");
+      managerProcesses[0].stop();
+      waitUntilRebalancedCount(currentCount, admin);
+      printStep("Destroyed container provider 0", admin);
+      
+      while(clusterStatusProvider.getTargetContainerCount("") > numContainerMin) {
+    	  int newCount = clusterStatusProvider.getTargetContainerCount("") - numContainerStep;
+    	  
+          log.info(String.format("Decreasing container count to %d", newCount));
+	      clusterStatusProvider.setTargetContainerCount(newCount);
+	      
+	      triggerPipeline(admin);
+	      waitUntilRebalancedCount(newCount, admin);
+	      printStep(String.format("Decreased container count to %d", clusterStatusProvider.getTargetContainerCount("")), admin);
+      }
+      
+      log.info("Stopping processes");
+      
+    } catch (Exception e)
+    {
+      e.printStackTrace();
+    } finally
+    {
+      if (managedControllerManager != null) {
+	      log.info("Disconnecting managed cluster controller");
+    	  managedControllerManager.disconnect();
+      }
+      if (metaControllerManager != null) {
+	      log.info("Disconnecting meta cluster controller");
+        metaControllerManager.disconnect();
+      }
+	  log.info("Destroying meta processes");
+      for (ManagerProcess process : managerProcesses) {
+    	  process.stop();
+      }
+    }
+    
+    // TODO clean up threads correctly
+    System.exit(0);
+  }
+
+private static void triggerPipeline(HelixAdmin admin) {
+	IdealState poke = admin.getResourceIdealState(metaClusterName, metaResourceName);
+	  admin.setResourceIdealState(metaClusterName, metaResourceName, poke);
+}
+  
+  private static void printStep(String text, HelixAdmin admin) throws Exception {
+	  log.info("********************************************************************************");
+      log.info(text);
+      log.info("********************************************************************************");
+      printClusterStatus(admin);
+      
+      System.out.println("Press ENTER to continue");
+      System.in.read();
+  }
+  
+  static void printClusterStatus(HelixAdmin admin) throws Exception {
+      log.info("Managed cluster status");
+      printStatusMasterSlave(admin);
+      log.info("Meta cluster status");
+      printMetaClusterStatus(admin);
+  }
+  
+  static void waitUntilRebalancedCount(int containerCount, HelixAdmin admin) throws InterruptedException {
+	  Thread.sleep(TIMESTEP_INTERVAL);
+	  while(containerCount != getMetaContainerCount(admin) ||
+			containerCount != getManagedContainerCount(admin)) {
+		  Thread.sleep(TIMESTEP_INTERVAL);
+  	  }
+	  ClusterStateVerifier.verifyByPolling(new BestPossAndExtViewZkVerifier(zkAddress, managedClusterName));
+  }
+
+  static int getMetaContainerCount(HelixAdmin admin) {
+	    Set<String> assignedInstances = new HashSet<String>();
+		  
+	    ExternalView externalView = admin.getResourceExternalView(metaClusterName, metaResourceName);
+		  
+	    for (String partitionName : externalView.getPartitionSet())
+	    {
+	      Map<String, String> stateMap = externalView.getStateMap(partitionName);
+	      if(stateMap == null)
+	  	    continue;
+	    
+	      for(String instanceName : stateMap.keySet()){
+	        if ("ONLINE".equals(stateMap.get(instanceName))) {
+	          assignedInstances.add(partitionName);
+	          break;
+	        }
+	      }
+	    }
+	  
+	    return assignedInstances.size();
+	  }
+
+  static int getManagedContainerCount(HelixAdmin admin) {
+    Set<String> assignedInstances = new HashSet<String>();
+	  
+    ExternalView externalView = admin.getResourceExternalView(managedClusterName, managedResourceName);
+	  
+    for (String partitionName : externalView.getPartitionSet())
+    {
+      Map<String, String> stateMap = externalView.getStateMap(partitionName);
+      if(stateMap == null)
+  	    continue;
+    
+      for(String instanceName : stateMap.keySet()){
+        if ("MASTER".equals(stateMap.get(instanceName)) ||
+      	    "SLAVE".equals(stateMap.get(instanceName))) {
+          assignedInstances.add(instanceName);
+        }
+      }
+    }
+  
+    return assignedInstances.size();
+  }
+
+  static void printMetaClusterStatus(HelixAdmin admin)
+  {
+    ExternalView externalView = admin
+        .getResourceExternalView(metaClusterName, metaResourceName);
+    TreeSet<String> treeSet = new TreeSet<String>(
+        externalView.getPartitionSet());
+    log.info("container" + "\t" + "acquired by");
+    log.info("======================================");
+    for (String partitionName : treeSet)
+    {
+      Map<String, String> stateMap = externalView.getStateMap(partitionName);
+      String acquiredBy = null;
+      if (stateMap != null)
+      {
+        for(String instanceName:stateMap.keySet()){
+          if ("ONLINE".equals(stateMap.get(instanceName))){
+            acquiredBy = instanceName;
+            break;
+          }
+        }
+      }
+      log.info(partitionName + "\t"
+          + ((acquiredBy != null) ? acquiredBy : "NONE"));
+    }
+  }
+
+  static void printStatusMasterSlave(HelixAdmin admin)
+	  {
+	    ExternalView externalView = admin
+	        .getResourceExternalView(managedClusterName, managedResourceName);
+	    TreeSet<String> treeSet = new TreeSet<String>(
+	        externalView.getPartitionSet());
+	    log.info("partition" + "\t" + "master" + "\t\t" + "slave");
+	    log.info("============================================================");
+	    for (String partitionName : treeSet)
+	    {
+	      Map<String, String> stateMap = externalView.getStateMap(partitionName);
+	      String master = "NONE";
+	      String slave = "NONE";
+	      if (stateMap != null)
+	      {
+	        for(String instanceName:stateMap.keySet()){
+	          if ("MASTER".equals(stateMap.get(instanceName))){
+	        	  master = instanceName;
+	          }
+	          if ("SLAVE".equals(stateMap.get(instanceName))){
+	        	  slave = instanceName;
+	          }
+	        }
+	      }
+	      log.info(String.format("%s\t%s\t%s", partitionName, master, slave));
+	    }
+	  }
+
+  public static void startLocalZookeeper() throws Exception
+  {
+    ZkServer server = null;
+    String baseDir = "/tmp/IntegrationTest/";
+    final String dataDir = baseDir + "zk/dataDir";
+    final String logDir = baseDir + "/tmp/logDir";
+    FileUtils.deleteDirectory(new File(dataDir));
+    FileUtils.deleteDirectory(new File(logDir));
+
+    IDefaultNameSpace defaultNameSpace = new IDefaultNameSpace()
+    {
+      @Override
+      public void createDefaultNameSpace(ZkClient zkClient)
+      {
+
+      }
+    };
+    server = new ZkServer(dataDir, logDir, defaultNameSpace, zkPort);
+    server.start();
+
+  }
+  
+  private static ClusterContainerProvider createContainerProvider(String type) throws Exception {
+      String providerName = "provider_" + providerCount;
+      providerCount++;
+      
+	  if(PROVIDER_LOCAL.equalsIgnoreCase(type)) {
+		  log.info("Using VM-local container provider");
+		  LocalContainerProvider provider =  new LocalContainerProvider(zkAddress, managedClusterName, providerName);
+		  providers.add(provider);
+		  return provider;
+	  } else if (PROVIDER_SHELL.equalsIgnoreCase(type)) {
+		  log.info("Using shell-based container provider");
+		  ShellContainerProvider provider = new ShellContainerProvider(zkAddress, managedClusterName, providerName, MANAGED_PROCESS_PATH);
+		  providers.add(provider);
+		  return provider;
+	  } else if (PROVIDER_YARN.equalsIgnoreCase(type)) {
+	      ApplicationConfig appConfig = new ApplicationConfig(zkAddress, managedClusterName, zkAddress, providerName);
+		  
+		  log.info("Using yarn-based container provider");
+		  YarnApplication yarnApplication = new YarnApplication(appConfig);
+		  yarnApplication.start();
+		  yarnApplications.add(yarnApplication);
+		  
+		  YarnContainerProvider yarnProvider = new YarnContainerProvider(appConfig, YARN_PROCESS_PATH);
+		  yarnProvider.startService();
+		  yarnProviders.add(yarnProvider);
+		  
+		  providers.add(yarnProvider);
+		  return yarnProvider;
+	  } else {
+		  throw new IllegalArgumentException(String.format("Unknown container provider type '%s'", type));
+	  }
+  }
+  
+  private static ClusterContainerStatusProvider createContainerStatusProvider(String type) throws Exception {
+	  if(PROVIDER_LOCAL.equalsIgnoreCase(type)) {
+		  log.info("Using VM-local container status provider");
+		  LocalContainerStatusProvider provider = new LocalContainerStatusProvider();
+		  return provider;
+	  } else if (PROVIDER_SHELL.equalsIgnoreCase(type)) {
+		  log.info("Using shell-based container status provider");
+		  ShellContainerStatusProvider provider = new ShellContainerStatusProvider();
+		  return provider;
+	  } else if (PROVIDER_YARN.equalsIgnoreCase(type)) {
+		  log.info("Using yarn-based container status provider");
+		  YarnContainerStatusProvider provider = new YarnContainerStatusProvider(zkAddress);
+		  provider.startService();
+		  yarnStatusProviders.add(provider);
+		  return provider;
+	  } else {
+		  throw new IllegalArgumentException(String.format("Unknown container status provider type '%s'", type));
+	  }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerFactory.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerFactory.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerFactory.java
new file mode 100644
index 0000000..44a924e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerFactory.java
@@ -0,0 +1,39 @@
+package org.apache.helix.metamanager;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.participant.statemachine.StateModelFactory;
+
+public class ManagerFactory extends StateModelFactory<Manager> {
+
+	final ClusterContainerProvider provider;
+	final ClusterAdmin admin;
+	
+	public ManagerFactory(ClusterContainerProvider provider, ClusterAdmin admin) {
+		super();
+		this.provider = provider;
+		this.admin = admin;
+	}
+
+	@Override
+	public Manager createNewStateModel(String partitionName) {
+		return new Manager(provider, admin);
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerProcess.java
new file mode 100644
index 0000000..7812e6f
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerProcess.java
@@ -0,0 +1,67 @@
+package org.apache.helix.metamanager;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.log4j.Logger;
+
+public class ManagerProcess
+{
+	static final Logger log = Logger.getLogger(ManagerProcess.class);
+	
+  final String clusterName;
+  final String zkAddress;
+  final String instanceName;
+  final ClusterContainerProvider provider;
+  final ClusterAdmin admin;
+  
+  HelixManager participantManager;
+  
+  ManagerProcess(String clusterName, String zkAddress, String instanceName, ClusterContainerProvider provider, ClusterAdmin admin)
+  {
+    this.clusterName = clusterName;
+    this.zkAddress = zkAddress;
+    this.instanceName = instanceName;
+    this.provider = provider;
+    this.admin = admin;
+  }
+
+  public void start() throws Exception
+  {
+    log.info("STARTING "+ instanceName);
+    participantManager = HelixManagerFactory.getZKHelixManager(clusterName,
+        instanceName, InstanceType.PARTICIPANT, zkAddress);
+    participantManager.getStateMachineEngine().registerStateModelFactory(
+        "OnlineOffline", new ManagerFactory(provider, admin));
+    participantManager.connect();
+    log.info("STARTED "+ instanceName);
+
+  }
+
+  public void stop()
+  {
+    if (participantManager != null)
+    {
+      participantManager.disconnect();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerRebalancer.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerRebalancer.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerRebalancer.java
new file mode 100644
index 0000000..2b2824c
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerRebalancer.java
@@ -0,0 +1,167 @@
+package org.apache.helix.metamanager;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.rebalancer.Rebalancer;
+import org.apache.helix.controller.stages.ClusterDataCache;
+import org.apache.helix.controller.stages.CurrentStateOutput;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.Partition;
+import org.apache.log4j.Logger;
+
+/**
+ * Rebalancer for cluster state. Uses cluster status provider.<br/>
+ * <br/>
+ * IdealState mapping:<br/>
+ * resource     = tag-name<br/>
+ *   partition  = logical container<br/>
+ *     instance = resource provider<br/>
+ *       status = physical container presence
+ *
+ */
+public class ManagerRebalancer implements Rebalancer {
+	
+	static final Logger log = Logger.getLogger(ManagerRebalancer.class);
+	
+	static final long UPDATE_INTERVAL_MIN = 1500;
+	
+	static final Object lock = new Object();
+	static long nextUpdate = 0; 
+
+	ClusterStatusProvider clusterStatusProvider;
+	ClusterContainerStatusProvider containerStatusProvider;
+	HelixManager manager;
+	
+	@Override
+	public void init(HelixManager manager) {
+		this.clusterStatusProvider = ConfigTool.getClusterStatusProvider();
+		this.containerStatusProvider = ConfigTool.getContainerStatusProvider();
+		this.manager = manager;
+	}
+
+	@Override
+	public IdealState computeNewIdealState(String resourceName,
+			IdealState currentIdealState,
+			CurrentStateOutput currentStateOutput, ClusterDataCache clusterData) {
+	
+//		synchronized(lock) {
+//			if(nextUpdate > System.currentTimeMillis()) {
+//				return currentIdealState;
+//			}
+//			nextUpdate = System.currentTimeMillis() + UPDATE_INTERVAL_MIN;
+		
+			// target container count
+			int targetCount = clusterStatusProvider.getTargetContainerCount(resourceName);
+			
+			// currently active containers
+			List<String> currentPartitions = new ArrayList<String>();
+			for(String partitionName : currentIdealState.getPartitionSet()) {
+				Map<String, String> currentStateMap = currentStateOutput.getCurrentStateMap(resourceName, new Partition(partitionName));
+				Map<String, String> pendingStateMap = currentStateOutput.getPendingStateMap(resourceName, new Partition(partitionName));
+				
+				if(hasOnlineInstance(currentStateMap) ||
+				   hasOnlineInstance(pendingStateMap)) {
+					currentPartitions.add(partitionName);
+				}
+			}
+			int currentCount = currentPartitions.size();
+			
+			// currently failed containers
+			List<String> failedPartitions = new ArrayList<String>();
+			for(String partitionName : currentIdealState.getPartitionSet()) {
+				Map<String, String> currentStateMap = currentStateOutput.getCurrentStateMap(resourceName, new Partition(partitionName));
+				
+				if(!hasOnlineInstance(currentStateMap))
+					continue;
+				
+				// container listed online, but does not exist
+				if(!containerStatusProvider.exists(partitionName)) {
+					log.warn(String.format("Container '%s' designated ONLINE, but does not exist", partitionName));
+					failedPartitions.add(partitionName);
+				}
+				
+				// container listed online and exists, but in failure state
+				if(containerStatusProvider.exists(partitionName) &&
+				   containerStatusProvider.isFailed(partitionName)) {
+					log.warn(String.format("Container '%s' designated ONLINE, but in failure state", partitionName));
+					failedPartitions.add(partitionName);
+				}
+			}
+			int failureCount = failedPartitions.size();
+			
+			if(currentCount != targetCount ||
+			   failureCount != 0) {
+				log.info(String.format("Rebalancing containers (current=%d, target=%d, failures=%d)", currentCount, targetCount, failureCount));
+				
+				currentIdealState.setNumPartitions(targetCount);
+				
+				// future active containers
+				log.debug("active containers");
+				List<String> activePartitions = new ArrayList<String>();
+				for(int i=0; i<targetCount; i++) {
+					String partitionName = resourceName + "_" + i;
+					activePartitions.add(partitionName);
+				}
+				activePartitions.removeAll(failedPartitions);
+				
+				// future passive containers
+				log.debug("passive containers");
+				List<String> passivePartitions = new ArrayList<String>();
+				for(int i=targetCount; i<currentCount; i++) {
+					String partitionName = resourceName + "_" + i;
+					passivePartitions.add(partitionName);
+				}
+				passivePartitions.addAll(failedPartitions);
+				
+				log.debug("output");
+				if(log.isDebugEnabled()) {
+					log.debug(String.format("%s: failed partitions %s", resourceName, failedPartitions));
+					log.debug(String.format("%s: active partitions %s", resourceName, activePartitions));
+					log.debug(String.format("%s: passive partitions %s", resourceName, passivePartitions));
+				}
+				
+				log.debug("building ideal state");
+				Map<String, List<String>> listFields = new HashMap<String, List<String>>();
+				Map<String, Map<String, String>> mapFields = new HashMap<String, Map<String, String>>();
+				for(String partitionName : activePartitions) {
+					listFields.put(partitionName, new ArrayList<String>());
+					mapFields.put(partitionName, new HashMap<String, String>());
+				}
+				currentIdealState.getRecord().setListFields(listFields);
+				currentIdealState.getRecord().setMapFields(mapFields);
+				
+				log.debug("setting ideal state");
+				String clusterName = manager.getClusterName();
+				manager.getClusterManagmentTool().setResourceIdealState(clusterName, resourceName, currentIdealState);
+				
+				log.debug("enable partitions");
+				for(String instanceName : clusterData.getInstanceConfigMap().keySet()) {
+					log.debug(String.format("enable partitions for '%s'", instanceName));
+					manager.getClusterManagmentTool().enablePartition(true, clusterName, instanceName, resourceName, activePartitions);
+					log.debug(String.format("disable partitions for '%s'", instanceName));
+					manager.getClusterManagmentTool().enablePartition(false, clusterName, instanceName, resourceName, passivePartitions);
+				}
+				
+				log.debug("done");
+			}
+			
+			return currentIdealState;
+//		}
+	}
+
+	private boolean hasOnlineInstance(Map<String, String> stateMap) {
+		if(!stateMap.isEmpty()) {
+			for(Map.Entry<String, String> entry : stateMap.entrySet()) {
+				if(entry.getValue().equals("ONLINE")) {
+					return true;
+				}
+			}
+		}
+		return false;
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/MetaManagerDemo.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/MetaManagerDemo.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/MetaManagerDemo.java
new file mode 100644
index 0000000..d0be313
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/MetaManagerDemo.java
@@ -0,0 +1,457 @@
+package org.apache.helix.metamanager;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.I0Itec.zkclient.IDefaultNameSpace;
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkServer;
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.container.ContainerUtils;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.local.LocalContainerProvider;
+import org.apache.helix.metamanager.impl.local.LocalStatusProvider;
+import org.apache.helix.metamanager.impl.shell.ShellContainerProvider;
+import org.apache.helix.metamanager.impl.shell.ShellStatusProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnApplicationProperties;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnStatusProvider;
+import org.apache.helix.metamanager.provider.ProviderProcess;
+import org.apache.helix.metamanager.provider.ProviderRebalancer;
+import org.apache.helix.model.ExternalView;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.tools.ClusterStateVerifier;
+import org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+public class MetaManagerDemo
+{
+	static final long TIMESTEP_INTERVAL = 1000;
+	
+	static final String PROVIDER_LOCAL = "LOCAL";
+	static final String PROVIDER_SHELL = "SHELL";
+	static final String PROVIDER_YARN  = "YARN";
+	
+	static final Logger log = Logger.getLogger(MetaManagerDemo.class);
+	
+	static final int zkPort = 2199;
+	static final String zkAddress = "localhost:" + zkPort;
+	static final String metaClusterName = "meta-cluster";
+	static final String managedClusterName = "managed-cluster";
+	static final String metaResourceName = "container";
+	static final String managedResourceName = "database";
+	
+	static final int numContainerProviders = 3;
+	static final int numContainerMax = 7;
+	static final int numContainerMin = 3;
+	static final int numContainerStep = 2;
+	static final int numContainerReplica = 1;
+	
+	static final int numManagedPartitions = 10;
+	static final int numManagedReplica = 2;
+	
+	static List<ContainerProvider> providers = new ArrayList<ContainerProvider>();
+	static int providerCount = 0;
+	
+	static Collection<Service> services = new ArrayList<Service>();
+	
+  /**
+   * LockManagerDemo clusterName, numInstances, lockGroupName, numLocks
+   * 
+   * @param args
+   * @throws Exception
+   */
+  public static void main(String[] args) throws Exception
+  {
+	  
+	String containerProviderType = PROVIDER_LOCAL;
+	if(args.length >= 1) {
+		containerProviderType = args[0];
+	}
+	  
+    StaticTargetProvider targetProvider = null;
+    ProviderProcess[] managerProcesses = new ProviderProcess[numContainerProviders];
+
+    HelixManager metaControllerManager = null;
+    HelixManager managedControllerManager = null;
+    
+        Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+            @Override
+            public void run() {
+                log.info("Destroying containers");
+                for (ContainerProvider provider : providers) {
+                    provider.destroyAll();
+                }
+                log.info("Stopping services");
+                for (Service service : services) {
+                    try { service.stop(); } catch (Exception ignore) {}
+                }
+            }
+        }));
+
+    try
+    {
+      log.info("Starting ZooKeeper");
+      startLocalZookeeper();
+      HelixAdmin admin = new ZKHelixAdmin(zkAddress);
+
+      log.info("Create clusters");
+      admin.addCluster(metaClusterName, true);
+      admin.addCluster(managedClusterName, true);
+      
+      log.info("Create providers");
+      targetProvider = startService(new StaticTargetProvider(Collections.singletonMap(metaResourceName, numContainerMin)));
+      StatusProvider statusProvider = startService(createContainerStatusProvider(containerProviderType));
+      
+      log.info("Setup config tool");
+      ConfigTool.setClusterStatusProvider(targetProvider);
+      ConfigTool.setContainerStatusProvider(statusProvider);
+      
+      // Managed Cluster
+      log.info("Setup managed cluster");
+      admin.addStateModelDef(managedClusterName, "MasterSlave",
+          new StateModelDefinition(StateModelConfigGenerator.generateConfigForMasterSlave()));
+      admin.addResource(managedClusterName, managedResourceName, numManagedPartitions,
+          "MasterSlave", RebalanceMode.FULL_AUTO.toString());
+      
+      IdealState managedIdealState = admin.getResourceIdealState(managedClusterName, managedResourceName);
+      managedIdealState.setInstanceGroupTag(metaResourceName);
+      managedIdealState.setReplicas(String.valueOf(numManagedReplica));
+      admin.setResourceIdealState(managedClusterName, managedResourceName, managedIdealState);	  
+      
+      // Meta Cluster
+      log.info("Setup meta cluster");
+      admin.addStateModelDef(metaClusterName, "OnlineOffline",
+          new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+      admin.addResource(metaClusterName, metaResourceName, targetProvider.getTargetContainerCount(metaResourceName),
+          "OnlineOffline", RebalanceMode.USER_DEFINED.toString());
+      
+      IdealState metaIdealState = admin.getResourceIdealState(metaClusterName, metaResourceName);
+      metaIdealState.setRebalancerClassName(ProviderRebalancer.class.getName());
+      metaIdealState.setReplicas("1");
+      admin.setResourceIdealState(metaClusterName, metaResourceName, metaIdealState);	  
+      
+      log.info("Starting meta processes (container providers)");
+      for (int i = 0; i < numContainerProviders; i++)
+      {
+        String instanceName = "provider_" + i;
+        admin.addInstance(metaClusterName, new InstanceConfig(instanceName));
+        
+        ClusterAdmin clusterAdmin = new HelixClusterAdmin(managedClusterName, admin);
+
+        managerProcesses[i] = new ProviderProcess(metaClusterName, zkAddress,
+                instanceName, startService(createContainerProvider(containerProviderType)), clusterAdmin);
+        managerProcesses[i].start();
+      }
+      
+      log.info("Starting managed cluster controller");
+      managedControllerManager = HelixControllerMain.startHelixController(zkAddress,
+          managedClusterName, "managedController", HelixControllerMain.STANDALONE);
+      log.info("Starting meta cluster controller");
+      metaControllerManager = HelixControllerMain.startHelixController(zkAddress,
+          metaClusterName, "metaController", HelixControllerMain.STANDALONE);
+          
+      waitUntilRebalancedCount(numContainerMin, admin);
+      printStep("Initial cluster state", admin);
+      
+      while(targetProvider.getTargetContainerCount(metaResourceName) < numContainerMax) {
+    	  int newCount = targetProvider.getTargetContainerCount(metaResourceName) + numContainerStep;
+    	  
+          log.info(String.format("Increasing container count to %d", newCount));
+	      targetProvider.setTargetContainerCount(metaResourceName, newCount);
+	      
+	      triggerPipeline(admin);	      
+	      waitUntilRebalancedCount(newCount, admin);
+	      printStep(String.format("Increased container count to %d", newCount), admin);
+      }
+      
+      log.info("Destroying container 0 and container 1");
+	  int currentCount = targetProvider.getTargetContainerCount(metaResourceName);
+      providers.get(0).destroy("container_0");
+      providers.get(0).destroy("container_1");
+      triggerPipeline(admin);
+      waitUntilRebalancedCount(currentCount, admin);
+      printStep("Destroyed container 0 and container 1", admin);
+      
+      log.info("Destroying container provider 0");
+	  currentCount = targetProvider.getTargetContainerCount(metaResourceName);
+      managerProcesses[0].stop();
+      waitUntilRebalancedCount(currentCount, admin);
+      printStep("Destroyed container provider 0", admin);
+      
+      while(targetProvider.getTargetContainerCount(metaResourceName) > numContainerMin) {
+    	  int newCount = targetProvider.getTargetContainerCount(metaResourceName) - numContainerStep;
+    	  
+          log.info(String.format("Decreasing container count to %d", newCount));
+	      targetProvider.setTargetContainerCount(metaResourceName, newCount);
+	      
+	      triggerPipeline(admin);
+	      waitUntilRebalancedCount(newCount, admin);
+	      printStep(String.format("Decreased container count to %d", targetProvider.getTargetContainerCount(metaResourceName)), admin);
+      }
+      
+      log.info("Stopping processes");
+      
+    } catch (Exception e)
+    {
+      e.printStackTrace();
+    } finally
+    {
+      if (managedControllerManager != null) {
+	      log.info("Disconnecting managed cluster controller");
+    	  managedControllerManager.disconnect();
+      }
+      if (metaControllerManager != null) {
+	      log.info("Disconnecting meta cluster controller");
+        metaControllerManager.disconnect();
+      }
+	  log.info("Destroying meta processes");
+      for (ProviderProcess process : managerProcesses) {
+    	  process.stop();
+      }
+    }
+    
+    // TODO clean up threads correctly
+    System.exit(0);
+  }
+
+private static void triggerPipeline(HelixAdmin admin) {
+	IdealState poke = admin.getResourceIdealState(metaClusterName, metaResourceName);
+	  admin.setResourceIdealState(metaClusterName, metaResourceName, poke);
+}
+  
+  private static void printStep(String text, HelixAdmin admin) throws Exception {
+	  log.info("********************************************************************************");
+      log.info(text);
+      log.info("********************************************************************************");
+      printClusterStatus(admin);
+      
+      System.out.println("Press ENTER to continue");
+      System.in.read();
+  }
+  
+  static void printClusterStatus(HelixAdmin admin) throws Exception {
+      log.info("Managed cluster status");
+      printStatusMasterSlave(admin);
+      log.info("Meta cluster status");
+      printMetaClusterStatus(admin);
+  }
+  
+  static void waitUntilRebalancedCount(int containerCount, HelixAdmin admin) throws InterruptedException {
+	  Thread.sleep(TIMESTEP_INTERVAL);
+	  while(containerCount != getMetaContainerCount(admin) ||
+			containerCount != getManagedContainerCount(admin)) {
+		  Thread.sleep(TIMESTEP_INTERVAL);
+  	  }
+	  ClusterStateVerifier.verifyByPolling(new BestPossAndExtViewZkVerifier(zkAddress, managedClusterName));
+  }
+
+  static int getMetaContainerCount(HelixAdmin admin) {
+	    Set<String> assignedInstances = new HashSet<String>();
+		  
+	    ExternalView externalView = admin.getResourceExternalView(metaClusterName, metaResourceName);
+		  
+	    for (String partitionName : externalView.getPartitionSet())
+	    {
+	      Map<String, String> stateMap = externalView.getStateMap(partitionName);
+	      if(stateMap == null)
+	  	    continue;
+	    
+	      for(String instanceName : stateMap.keySet()){
+	        if ("ONLINE".equals(stateMap.get(instanceName))) {
+	          assignedInstances.add(partitionName);
+	          break;
+	        }
+	      }
+	    }
+	  
+	    return assignedInstances.size();
+	  }
+
+  static int getManagedContainerCount(HelixAdmin admin) {
+    Set<String> assignedInstances = new HashSet<String>();
+	  
+    ExternalView externalView = admin.getResourceExternalView(managedClusterName, managedResourceName);
+	  
+    for (String partitionName : externalView.getPartitionSet())
+    {
+      Map<String, String> stateMap = externalView.getStateMap(partitionName);
+      if(stateMap == null)
+  	    continue;
+    
+      for(String instanceName : stateMap.keySet()){
+        if ("MASTER".equals(stateMap.get(instanceName)) ||
+      	    "SLAVE".equals(stateMap.get(instanceName))) {
+          assignedInstances.add(instanceName);
+        }
+      }
+    }
+  
+    return assignedInstances.size();
+  }
+
+  static void printMetaClusterStatus(HelixAdmin admin)
+  {
+    ExternalView externalView = admin
+        .getResourceExternalView(metaClusterName, metaResourceName);
+    TreeSet<String> treeSet = new TreeSet<String>(
+        externalView.getPartitionSet());
+    log.info("container" + "\t" + "acquired by");
+    log.info("======================================");
+    for (String partitionName : treeSet)
+    {
+      Map<String, String> stateMap = externalView.getStateMap(partitionName);
+      String acquiredBy = null;
+      if (stateMap != null)
+      {
+        for(String instanceName:stateMap.keySet()){
+          if ("ONLINE".equals(stateMap.get(instanceName))){
+            acquiredBy = instanceName;
+            break;
+          }
+        }
+      }
+      log.info(partitionName + "\t"
+          + ((acquiredBy != null) ? acquiredBy : "NONE"));
+    }
+  }
+
+  static void printStatusMasterSlave(HelixAdmin admin)
+	  {
+	    ExternalView externalView = admin
+	        .getResourceExternalView(managedClusterName, managedResourceName);
+	    TreeSet<String> treeSet = new TreeSet<String>(
+	        externalView.getPartitionSet());
+	    log.info("partition" + "\t" + "master" + "\t\t" + "slave");
+	    log.info("============================================================");
+	    for (String partitionName : treeSet)
+	    {
+	      Map<String, String> stateMap = externalView.getStateMap(partitionName);
+	      String master = "NONE";
+	      String slave = "NONE";
+	      if (stateMap != null)
+	      {
+	        for(String instanceName:stateMap.keySet()){
+	          if ("MASTER".equals(stateMap.get(instanceName))){
+	        	  master = instanceName;
+	          }
+	          if ("SLAVE".equals(stateMap.get(instanceName))){
+	        	  slave = instanceName;
+	          }
+	        }
+	      }
+	      log.info(String.format("%s\t%s\t%s", partitionName, master, slave));
+	    }
+	  }
+
+  public static void startLocalZookeeper() throws Exception
+  {
+    ZkServer server = null;
+	String baseDir = "/tmp/metamanager/";
+	final String dataDir = baseDir + "zk/dataDir";
+	final String logDir = baseDir + "zk/logDir";
+    FileUtils.deleteDirectory(new File(dataDir));
+    FileUtils.deleteDirectory(new File(logDir));
+
+    IDefaultNameSpace defaultNameSpace = new IDefaultNameSpace()
+    {
+      @Override
+      public void createDefaultNameSpace(ZkClient zkClient)
+      {
+
+      }
+    };
+    server = new ZkServer(dataDir, logDir, defaultNameSpace, zkPort);
+    server.start();
+
+  }
+  
+  private static ContainerProviderService createContainerProvider(String type) throws Exception {
+      String providerName = "provider_" + providerCount;
+      providerCount++;
+      
+	  if(PROVIDER_LOCAL.equalsIgnoreCase(type)) {
+		  log.info("Using VM-local container provider");
+		  LocalContainerProvider provider =  new LocalContainerProvider(zkAddress, managedClusterName, providerName);
+		  provider.registerType("container", ContainerUtils.getPropertiesFromResource("container.properties"));
+		  providers.add(provider);
+		  return provider;
+	  } else if (PROVIDER_SHELL.equalsIgnoreCase(type)) {
+		  log.info("Using shell-based container provider");
+		  ShellContainerProvider provider = new ShellContainerProvider(zkAddress, managedClusterName, providerName);
+          provider.registerType("container", ContainerUtils.getPropertiesFromResource("container.properties"));
+		  providers.add(provider);
+		  return provider;
+	  } else if (PROVIDER_YARN.equalsIgnoreCase(type)) {
+	      YarnApplicationProperties properties = new YarnApplicationProperties();
+	      properties.put(YarnApplicationProperties.HELIX_CLUSTER, managedClusterName);
+	      properties.put(YarnApplicationProperties.HELIX_ZOOKEEPER, zkAddress);
+	      properties.put(YarnApplicationProperties.PROVIDER_METADATA, zkAddress);
+	      properties.put(YarnApplicationProperties.PROVIDER_NAME, providerName);
+		  
+		  log.info("Using yarn-based container provider");
+		  YarnContainerProvider yarnProvider = new YarnContainerProvider(properties);
+          yarnProvider.registerType("container", ContainerUtils.getPropertiesFromResource("container.properties"));
+		  
+		  providers.add(yarnProvider);
+		  return yarnProvider;
+	  } else {
+		  throw new IllegalArgumentException(String.format("Unknown container provider type '%s'", type));
+	  }
+  }
+  
+  private static StatusProviderService createContainerStatusProvider(String type) throws Exception {
+	  if(PROVIDER_LOCAL.equalsIgnoreCase(type)) {
+		  log.info("Using VM-local container status provider");
+		  return new LocalStatusProvider();
+	  } else if (PROVIDER_SHELL.equalsIgnoreCase(type)) {
+		  log.info("Using shell-based container status provider");
+		  return new ShellStatusProvider();
+	  } else if (PROVIDER_YARN.equalsIgnoreCase(type)) {
+		  log.info("Using yarn-based container status provider");
+		  return new YarnStatusProvider(zkAddress);
+	  } else {
+		  throw new IllegalArgumentException(String.format("Unknown container status provider type '%s'", type));
+	  }
+  }
+  
+  private static <T extends Service> T startService(T service) throws Exception {
+      service.start();
+      services.add(service);
+      return service;
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/Service.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/Service.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/Service.java
new file mode 100644
index 0000000..c13a62e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/Service.java
@@ -0,0 +1,38 @@
+package org.apache.helix.metamanager;
+
+import java.util.Properties;
+
+/**
+ * Abstraction for configurable and runnable service. Light-weight dependency
+ * injection and life-cycle management.
+ * 
+ */
+public interface Service {
+
+    /**
+     * Configure service internals<br/>
+     * <b>INVARIANT:</b> executed only once
+     * 
+     * @param properties
+     *            arbitrary key-value properties, parsed internally
+     * @throws Exception
+     */
+    void configure(Properties properties) throws Exception;
+
+    /**
+     * Start service.<br/>
+     * <b>PRECONDITION:</b> configure() was invoked<br/>
+     * <b>INVARIANT:</b> executed only once
+     * 
+     * @throws Exception
+     */
+    void start() throws Exception;
+
+    /**
+     * Stop service.<br/>
+     * <b>INVARIANT:</b> idempotent
+     * 
+     * @throws Exception
+     */
+    void stop() throws Exception;
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StaticStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StaticStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StaticStatusProvider.java
new file mode 100644
index 0000000..249b9b8
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StaticStatusProvider.java
@@ -0,0 +1,28 @@
+package org.apache.helix.metamanager;
+
+import java.util.HashMap;
+import java.util.Map;
+
+
+public class StaticStatusProvider implements ClusterStatusProvider {
+
+	final Map<String, Integer> targetCounts = new HashMap<String, Integer>();
+	
+	public StaticStatusProvider() {
+	    // left blank
+	}
+	
+	public StaticStatusProvider(Map<String, Integer> targetCounts) {
+		this.targetCounts.putAll(targetCounts);
+	}
+	
+	@Override
+	public int getTargetContainerCount(String containerType) {
+		return targetCounts.get(containerType);
+	}
+
+	public void setTargetContainerCount(String containerType, int targetCount) {
+		targetCounts.put(containerType, targetCount);
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StatusProvider.java
new file mode 100644
index 0000000..841f08d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StatusProvider.java
@@ -0,0 +1,35 @@
+package org.apache.helix.metamanager;
+
+import org.apache.helix.metamanager.provider.ProviderRebalancer;
+
+/**
+ * Abstraction for status reader of container deployment framework. Provides
+ * information on physical existence of container and activity or failure state.
+ * Is polled by ProviderRebalancer and should be light-weight and non-blocking.<br/>
+ * <b>NOTE:</b> This information is solely based on the low-level framework and
+ * may be different from the participant state in Helix. (The Helix participant
+ * may not even exist)
+ * 
+ * @see ProviderRebalancer
+ */
+public interface StatusProvider {
+
+    /**
+     * Determine whether container physically exists.
+     * 
+     * @param id
+     *            unique container id
+     * @return true, if container is present
+     */
+    public boolean exists(String id);
+
+    /**
+     * Determine whether container is healthy as determined by the deployment
+     * framework.
+     * 
+     * @param id
+     *            unique container id
+     * @return true, if container is healthy
+     */
+    public boolean isHealthy(String id);
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StatusProviderService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StatusProviderService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StatusProviderService.java
new file mode 100644
index 0000000..3c2739d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StatusProviderService.java
@@ -0,0 +1,9 @@
+package org.apache.helix.metamanager;
+
+/**
+ * StatusProvider as configurable service.
+ * 
+ */
+public interface StatusProviderService extends StatusProvider, Service {
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/TargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/TargetProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/TargetProvider.java
new file mode 100644
index 0000000..22524c4
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/TargetProvider.java
@@ -0,0 +1,25 @@
+package org.apache.helix.metamanager;
+
+import org.apache.helix.metamanager.provider.ProviderRebalancer;
+
+/**
+ * Abstraction for target computation and statistics collection. Provides target
+ * count of containers for ProviderRebalancer. Is polled by ProviderRebalancer
+ * and should be light-weight and non-blocking.<br/>
+ * <b>NOTE:</b> The target count is oblivious of failed containers and can be
+ * obtained in an arbitrary way. See implementations for examples.
+ * 
+ * @see ProviderRebalancer
+ */
+public interface TargetProvider {
+
+    /**
+     * Return target count of containers of a specific type.
+     * 
+     * @param containerType
+     *            meta resource name
+     * @return container count >= 1
+     * @throws Exception
+     */
+    public int getTargetContainerCount(String containerType) throws Exception;
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/TargetProviderService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/TargetProviderService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/TargetProviderService.java
new file mode 100644
index 0000000..4d6275e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/TargetProviderService.java
@@ -0,0 +1,9 @@
+package org.apache.helix.metamanager;
+
+/**
+ * TargetProvider as configurable service.
+ * 
+ */
+public interface TargetProviderService extends TargetProvider, Service {
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ZookeeperSetter.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ZookeeperSetter.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ZookeeperSetter.java
new file mode 100644
index 0000000..39e20fe
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ZookeeperSetter.java
@@ -0,0 +1,30 @@
+package org.apache.helix.metamanager;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.log4j.Logger;
+
+/**
+ * Utility for setting String values in the embedded zookeeper service.
+ * (Program entry point)
+ * 
+ */
+public class ZookeeperSetter {
+
+    static Logger log = Logger.getLogger(ZookeeperSetter.class);
+
+    /**
+     * @param args
+     */
+    public static void main(String[] args) {
+        String address = args[0];
+        String path = args[1];
+        String value = args[2];
+
+        log.info(String.format("Setting %s:%s to '%s'", address, path, value));
+
+        ZkClient client = new ZkClient(address);
+        client.createPersistent(path, true);
+        client.writeData(path, value);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/BootUtil.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/BootUtil.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/BootUtil.java
new file mode 100644
index 0000000..004de06
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/BootUtil.java
@@ -0,0 +1,58 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.log4j.Logger;
+
+public class BootUtil {
+
+    public static final String CLASS_PROPERTY = "class";
+    static final Logger log = Logger.getLogger(BootUtil.class);
+    
+    public static Properties getNamespace(String namespace, Properties source) {
+        Properties dest = new Properties();
+        String prefix = namespace + ".";
+        
+        for(Map.Entry<Object, Object> rawEntry : source.entrySet()) {
+            String key = (String)rawEntry.getKey();
+            String value = (String)rawEntry.getValue();
+            
+            if(key.startsWith(prefix)) {
+                String newKey = key.substring(prefix.length());
+                dest.put(newKey, value);
+            }
+        }
+        
+        return dest;
+    }
+    
+    @SuppressWarnings("unchecked")
+    public static <T> T createInstance(Properties properties) throws Exception {
+        String className = properties.getProperty(CLASS_PROPERTY);
+        
+        Class<?> containerClass = Class.forName(className);
+        
+        try {
+            log.debug(String.format("checking for properties constructor in class '%s'", className));
+            return (T)containerClass.getConstructor(ContainerProcessProperties.class).newInstance(properties);
+        } catch (Exception e) {
+            log.debug("no properties constructor found");
+        }
+        
+        try {
+            log.debug(String.format("checking for default constructor in class '%s'", className));
+            return (T)containerClass.getConstructor().newInstance();
+        } catch (Exception e) {
+            log.debug("no default constructor found");
+        }
+        
+        throw new Exception(String.format("no suitable constructor for class '%s'", className));
+    }
+    
+    private BootUtil() {
+        // left blank
+    }
+    
+}


[03/15] Adding Helix-task-framework and Yarn integration modules

Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMasterService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMasterService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMasterService.java
new file mode 100644
index 0000000..b63be1f
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMasterService.java
@@ -0,0 +1,361 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ContainerManager;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.records.AMResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.provider.yarn.ContainerMetadata.ContainerState;
+import org.apache.helix.metamanager.provider.yarn.MetadataService.MetadataServiceException;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Maps;
+
+public class YarnMasterService {
+
+	static final Logger log = Logger.getLogger(YarnMasterService.class);
+
+	static final String REQUIRED_TYPE = "container";
+	
+	static final long ZOOKEEPER_TIMEOUT = 5000;
+	
+	static final long MASTERSERVICE_INTERVAL = 1000;
+	
+	static final String CONTAINERS = "CONTAINERS";
+	
+	static final String CONTAINER_COMMAND = "/bin/sh %s %s %s %s %s %s 1>%s/stdout 2>%s/stderr";
+
+	/*
+	 * CONTAINERS
+	 *   A (A, READY)
+	 *   B (B, RUNNING)
+	 */
+	
+	final ApplicationConfig appConfig;
+	final AMRMProtocol yarnClient;
+	final ApplicationAttemptId appAtemptId;
+	
+	final Configuration yarnConfig;
+	
+	final File dummy = new File("/tmp/dummy");
+	
+	final Map<ContainerId, Container> unassignedContainers = new HashMap<ContainerId, Container>();
+	final Map<ContainerId, Container> activeContainers = new HashMap<ContainerId, Container>();
+	final Map<ContainerId, ContainerStatus> completedContainers = new HashMap<ContainerId, ContainerStatus>();
+	final Map<ContainerId, String> yarn2meta = new HashMap<ContainerId, String>();
+	
+	final MetadataService metaService;
+	
+	ScheduledExecutorService executor;
+
+	public YarnMasterService(AMRMProtocol yarnClient, Configuration conf, ApplicationAttemptId appAttemptId, ApplicationConfig appConfig, MetadataService metaService) {
+		this.appConfig = appConfig;
+		this.yarnClient = yarnClient;
+		this.appAtemptId = appAttemptId;
+		this.yarnConfig = conf;
+		this.metaService = metaService;
+	}
+
+	public void startService() {
+		log.debug("starting yarn master service");
+		
+		executor = Executors.newSingleThreadScheduledExecutor();
+		executor.scheduleAtFixedRate(new YarnService(), 0, MASTERSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+	}
+	
+	public void stopService() {
+		log.debug("stopping yarn master service");
+		
+		if(executor != null) {
+			executor.shutdown();
+			while(!executor.isTerminated()) {
+				try {
+					Thread.sleep(100);
+				} catch (InterruptedException e) {
+					// ignore
+				}
+			}
+			executor = null;
+		}
+	}
+	
+	Collection<ContainerMetadata> readOwnedMetadata() throws MetadataServiceException {
+		log.debug("reading container data");
+		
+		Collection<ContainerMetadata> containers = new ArrayList<ContainerMetadata>();
+		for(ContainerMetadata meta : metaService.readAll()) {
+			if(meta.owner.equals(appConfig.providerName)) {
+				containers.add(meta);
+				log.debug(String.format("found container node '%s' (state=%s, yarnId=%s, command=%s, owner=%s)", 
+						meta.id, meta.state, meta.yarnId, meta.command, meta.owner));
+			}
+		}
+		return containers;
+	}
+	
+	class YarnService implements Runnable {
+		int responseId = 0;
+		
+		@Override
+		public void run() {
+			try {
+				log.debug("running yarn service update cycle");
+				
+				Collection<ContainerMetadata> metadata = readOwnedMetadata();
+				
+				// active meta containers
+				int numMetaActive = countActiveMeta(metadata);
+				
+				// newly acquired meta containers
+				int numMetaAcquire = countAcquireMeta(metadata);
+				
+				// destroyed meta containers
+				List<ContainerId> destroyedReleasedIds = createDestroyedReleaseList(metadata);
+				int numMetaCompleted = destroyedReleasedIds.size();
+				
+				int numMeta = numMetaAcquire + numMetaActive + numMetaCompleted;
+				
+				// yarn containers
+				int numYarnUnassigned = unassignedContainers.size();
+				int numYarnActive = activeContainers.size();
+				int numYarnCompleted = completedContainers.size();
+				int numYarn = numYarnUnassigned + numYarnActive + numYarnCompleted;
+				
+				int numYarnRequired = numMetaAcquire - numYarnUnassigned;
+				
+				// additionally required containers
+				int numRequestAdditional = Math.max(0, numYarnRequired);
+				
+				// overstock containers
+				List<ContainerId> unneededReleasedIds = createOverstockReleaseList(numYarnRequired);
+				
+				int numReleased = destroyedReleasedIds.size() + unneededReleasedIds.size();
+				
+				log.debug(String.format("meta containers (total=%d, acquire=%d, active=%d, completed=%d)", numMeta, numMetaAcquire, numMetaActive, numMetaCompleted));
+				log.debug(String.format("yarn containers (total=%d, unassigned=%d, active=%d, completed=%d)", numYarn, numYarnUnassigned, numYarnActive, numYarnCompleted));
+				log.debug(String.format("requesting %d new containers, releasing %d", numRequestAdditional, numReleased));
+				
+				Priority priority = Records.newRecord(Priority.class);
+				priority.setPriority(0);
+				
+				Resource resource = Records.newRecord(Resource.class);
+				resource.setMemory(256); // TODO make dynamic
+				
+				ResourceRequest resourceRequest = Records.newRecord(ResourceRequest.class);
+				resourceRequest.setHostName("*");
+				resourceRequest.setNumContainers(numRequestAdditional);
+				resourceRequest.setPriority(priority);
+				resourceRequest.setCapability(resource);
+				
+				AllocateRequest request = Records.newRecord(AllocateRequest.class);
+				request.setResponseId(responseId);
+				request.setApplicationAttemptId(appAtemptId);
+				request.addAsk(resourceRequest);
+				request.addAllReleases(destroyedReleasedIds);
+				request.addAllReleases(unneededReleasedIds);
+				
+				responseId++;
+				
+				AllocateResponse allocateResponse = null;
+				try {
+					allocateResponse = yarnClient.allocate(request);
+				} catch (YarnRemoteException e) {
+					// ignore
+					log.error("Error allocating containers", e);
+					return;
+				}
+				
+				AMResponse response = allocateResponse.getAMResponse();
+				
+				// newly added containers
+				for(Container container : response.getAllocatedContainers()) {
+					unassignedContainers.put(container.getId(), container);
+				}
+				
+				log.info(String.format("%d new containers available, %d required", unassignedContainers.size(), numMetaAcquire));
+				
+				Iterator<Container> itYarn = unassignedContainers.values().iterator();
+				Iterator<ContainerMetadata> itMeta = metadata.iterator();
+				while(itYarn.hasNext() && itMeta.hasNext()) {
+					ContainerMetadata meta = itMeta.next();
+					
+					if(meta.yarnId >= 0)
+						continue;
+					
+					Container containerYarn = itYarn.next();
+					
+					log.debug(String.format("assigning yarn container '%s' to container node '%s'", containerYarn.getId(), meta.id));
+					
+					String command = String.format(CONTAINER_COMMAND, meta.command,
+							appConfig.clusterAddress, appConfig.clusterName, appConfig.metadataAddress, appConfig.providerName,
+							meta.id, "/tmp/" + meta.id, "/tmp/" + meta.id);  
+							//ApplicationConstants.LOG_DIR_EXPANSION_VAR, ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+					
+					ContainerLaunchContext context = Records.newRecord(ContainerLaunchContext.class);
+					context.setContainerId(containerYarn.getId());
+					context.setResource(containerYarn.getResource());
+					context.setEnvironment(Maps.<String, String>newHashMap());
+					context.setCommands(Collections.singletonList(command));
+					context.setLocalResources(Utils.getDummyResources());
+					try {
+						context.setUser(UserGroupInformation.getCurrentUser().getShortUserName());
+					} catch (IOException e) {
+						log.error(String.format("failed setting up container '%s' user information", meta.id));
+						return;
+					}
+					
+					log.debug(String.format("container '%s' executing command '%s'", meta.id, command));
+
+					StartContainerRequest startReq = Records.newRecord(StartContainerRequest.class);
+					startReq.setContainerLaunchContext(context);
+					
+					try {
+						getContainerManager(containerYarn).startContainer(startReq);
+
+					} catch (YarnRemoteException e) {
+						log.error(String.format("Error starting container '%s'", meta.id), e);
+						return;
+					}
+						
+					log.debug(String.format("container '%s' started, updating container node", meta.id));
+
+					metaService.update(new ContainerMetadata(meta, ContainerState.CONNECTING, containerYarn.getId().getId()));
+					yarn2meta.put(containerYarn.getId(), meta.id);
+					
+					log.debug(String.format("removing '%s' from unassigned yarn containers and adding to active list", containerYarn.getId()));
+
+					itYarn.remove();
+					activeContainers.put(containerYarn.getId(), containerYarn);
+					
+				}
+				
+				for(ContainerStatus status : response.getCompletedContainersStatuses()) {
+					ContainerId id = status.getContainerId();
+					
+					log.info(String.format("Container '%s' completed", id));
+					
+					if(unassignedContainers.containsKey(id)) {
+						log.info(String.format("Unassigned container '%s' terminated, removing", id));
+						unassignedContainers.remove(id);
+						// TODO handle
+					}
+					
+					if(activeContainers.containsKey(id)) {
+						log.info(String.format("Active container '%s' terminated, removing", id));
+						activeContainers.remove(id);
+						
+						String metaId = yarn2meta.get(id);
+						ContainerMetadata meta = metaService.read(metaId);
+						
+						log.debug(String.format("container '%s' finalized, updating container node", meta.id));
+						
+						metaService.update(new ContainerMetadata(meta, ContainerState.FINALIZE));
+					}
+					
+					completedContainers.put(id, status);
+				}
+
+				log.debug("yarn service update cycle complete");
+				
+			} catch (Exception e) {
+				log.error("Error while executing yarn update cycle", e);
+			}
+		}
+
+		private List<ContainerId> createOverstockReleaseList(int numYarnRequired) {
+			List<ContainerId> unneededReleasedIds = new ArrayList<ContainerId>();
+			Iterator<Container> itUnassigned = unassignedContainers.values().iterator();
+			if(numYarnRequired < 0) {
+				for(int i=0; i<-numYarnRequired && itUnassigned.hasNext(); i++) {
+					Container container = itUnassigned.next();
+					unneededReleasedIds.add(container.getId());
+					log.debug(String.format("Container '%s' no longer required, removing", container.getId()));
+					itUnassigned.remove();
+				}
+			}
+			return unneededReleasedIds;
+		}
+
+		private List<ContainerId> createDestroyedReleaseList(
+				Collection<ContainerMetadata> metadata) {
+			List<ContainerId> releasedIds = new ArrayList<ContainerId>();
+			for(ContainerMetadata meta : metadata) {
+				if(meta.state == ContainerState.HALTED) {
+					ContainerId containerId = Records.newRecord(ContainerId.class);
+					containerId.setApplicationAttemptId(appAtemptId);
+					containerId.setId(meta.yarnId);
+					releasedIds.add(containerId);
+					log.debug(String.format("releasing container '%s'", containerId));
+				}
+			}
+			return releasedIds;
+		}
+
+		private int countAcquireMeta(Collection<ContainerMetadata> metadata) {
+			int numMetaAcquire = 0;
+			for(ContainerMetadata meta : metadata) {
+				if(meta.state == ContainerState.ACQUIRE) {
+					numMetaAcquire++;
+				}
+			}
+			return numMetaAcquire;
+		}
+
+		private int countActiveMeta(Collection<ContainerMetadata> metadata) {
+			int numMetaActive = 0;
+			for(ContainerMetadata meta : metadata) {
+				if(meta.state != ContainerState.ACQUIRE &&
+				   meta.state != ContainerState.HALTED &&
+				   meta.state != ContainerState.FINALIZE) {
+					numMetaActive++;
+				}
+			}
+			return numMetaActive;
+		}
+	}
+	
+	private ContainerManager getContainerManager(Container container) {
+		YarnConfiguration yarnConf = new YarnConfiguration(yarnConfig);
+		YarnRPC rpc = YarnRPC.create(yarnConf);
+		NodeId nodeId = container.getNodeId();
+		String containerIpPort = String.format("%s:%d", nodeId.getHost(),
+				nodeId.getPort());
+		log.info("Connecting to ContainerManager at: " + containerIpPort);
+		InetSocketAddress addr = NetUtils.createSocketAddr(containerIpPort);
+		ContainerManager cm = (ContainerManager) rpc.getProxy(
+				ContainerManager.class, addr, yarnConfig);
+		return cm;
+	}
+		  
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnProcess.java
new file mode 100644
index 0000000..b1a22d5
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnProcess.java
@@ -0,0 +1,171 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.metamanager.managed.ManagedFactory;
+import org.apache.helix.metamanager.provider.yarn.ContainerMetadata.ContainerState;
+import org.apache.log4j.Logger;
+
+public class YarnProcess {
+	static final Logger log = Logger.getLogger(YarnProcess.class);
+
+	static final long CONTAINERSERVICE_INTERVAL = 1000;
+
+	final ApplicationConfig appConfig;
+	final String containerId;
+	
+	HelixManager participantManager;
+
+	MetadataService metaService;
+	ScheduledExecutorService executor;
+
+
+	public YarnProcess(ApplicationConfig appConfig, String containerId) {
+		this.appConfig = appConfig;
+		this.containerId = containerId;
+	}
+
+	public void startService() {
+		log.info(String.format("start metadata service for '%s'", containerId));
+		metaService = new MetadataService(appConfig.metadataAddress);
+		metaService.start();
+		
+		executor = Executors.newSingleThreadScheduledExecutor();
+		executor.scheduleAtFixedRate(new ContainerService(), 0, CONTAINERSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+	}
+
+	public void stopService() {
+		log.info(String.format("stop metadata service for '%s'", containerId));
+		if (metaService != null) {
+			metaService.stop();
+			metaService = null;
+		}
+		
+		if(executor != null) {
+			executor.shutdown();
+		}
+	}
+	
+	public boolean isRunning() {
+		if(executor == null)
+			return false;
+		return !executor.isTerminated();
+	}
+	
+	public void startParticipant() throws Exception {
+		log.info("STARTING " + containerId);
+		participantManager = HelixManagerFactory.getZKHelixManager(appConfig.clusterName,
+				containerId, InstanceType.PARTICIPANT, appConfig.clusterAddress);
+		participantManager.getStateMachineEngine().registerStateModelFactory(
+				"MasterSlave", new ManagedFactory());
+		participantManager.connect();
+		log.info("STARTED " + containerId);
+	}
+
+	public void stopParticipant() {
+		if (participantManager != null) {
+			participantManager.disconnect();
+			participantManager = null;
+		}
+	}
+	
+	public void updateContainerStatus() {
+		log.info("updating container status");
+		try {
+			ContainerMetadata meta = metaService.read(containerId);
+			
+			if(meta.state == ContainerState.CONNECTING) {
+				log.info("container connecting, going to active");
+				try {
+					startParticipant();
+					metaService.update(new ContainerMetadata(meta, ContainerState.ACTIVE));
+				} catch (Exception e) {
+					log.error("Failed to start participant, going to failed", e);
+					stopParticipant();
+					metaService.update(new ContainerMetadata(meta, ContainerState.FAILED));
+				}
+			}
+			
+			if(meta.state == ContainerState.ACTIVE) {
+				// do something
+				// and go to failed on error
+			}
+			
+			if(meta.state == ContainerState.TEARDOWN) {
+				log.info("container teardown, going to halted");
+				stopParticipant();
+				metaService.update(new ContainerMetadata(meta, ContainerState.HALTED));
+				stopService();
+			}
+			
+		} catch(Exception e) {
+			log.warn(String.format("Container '%s' does not exist, stopping service", containerId));
+			stopService();
+		}
+	}
+	
+	class ContainerService implements Runnable {
+		@Override
+		public void run() {
+			updateContainerStatus();
+		}
+	}
+
+  public static void main(String[] args) throws Exception
+  {
+	log.trace("BEGIN YarnProcess.main()");
+	  
+    final String clusterAddress = args[0];
+    final String clusterName = args[1];
+    final String metadataAddress = args[2];
+    final String providerName = args[3];
+    final String containerId = args[4];
+
+    final ApplicationConfig appConfig = new ApplicationConfig(clusterAddress, clusterName, metadataAddress, providerName);
+    
+    final YarnProcess yarnProcess = new YarnProcess(appConfig, containerId);
+
+    yarnProcess.startService();
+    
+    Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+		@Override
+		public void run() {
+			yarnProcess.stopService();
+		}
+	}));
+    
+	while(yarnProcess.isRunning()) {
+		try {
+			Thread.sleep(100);
+		} catch (InterruptedException e) {
+			// ignore
+		}
+	}
+	
+	log.trace("END YarnProcess.main()");
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ZookeeperMetadataService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ZookeeperMetadataService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ZookeeperMetadataService.java
new file mode 100644
index 0000000..00bf17f
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ZookeeperMetadataService.java
@@ -0,0 +1,102 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import java.util.ArrayList;
+import java.util.Collection;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.exception.ZkException;
+import org.apache.log4j.Logger;
+
+public class ZookeeperMetadataService implements MetadataService {
+	
+	static final Logger log = Logger.getLogger(ZookeeperMetadataService.class);
+	
+	static final String CONTAINER_NAMESPACE = "containers";
+	
+	static final String BASE_PATH = "/" + CONTAINER_NAMESPACE;
+	
+	static final long POLL_INTERVAL = 100;
+
+	final String metadataAddress;
+	
+	ZkClient client;
+	
+	public ZookeeperMetadataService(String metadataAddress) {
+		this.metadataAddress = metadataAddress;
+	}
+
+	public void startService() {
+		log.debug(String.format("starting metadata service for '%s'", metadataAddress));
+		
+		client = new ZkClient(metadataAddress);
+		
+		client.createPersistent(BASE_PATH, true);
+	}
+	
+	public void stopService() {
+		log.debug(String.format("stopping metadata service for '%s'", metadataAddress));
+		if(client != null) {
+			client.close();
+			client = null;
+		}
+	}
+	
+	@Override
+	public boolean exists(String id) {
+		return client.exists(makePath(id));
+	}
+	
+	@Override
+	public void create(ContainerMetadata meta) throws MetadataServiceException {
+		try {
+			client.createPersistent(makePath(meta.id), Utils.toJson(meta));
+		} catch (ZkException e) {
+			throw new MetadataServiceException(e);
+		}
+	}
+	
+	@Override
+	public ContainerMetadata read(String id) throws MetadataServiceException {
+		try {
+			return Utils.fromJson(client.<String>readData(makePath(id)));
+		} catch (ZkException e) {
+			throw new MetadataServiceException(e);
+		}
+	}
+	
+	@Override
+	public Collection<ContainerMetadata> readAll() throws MetadataServiceException {
+		try {
+			Collection<ContainerMetadata> metadata = new ArrayList<ContainerMetadata>();
+			for(String id : client.getChildren(BASE_PATH)) {
+				metadata.add(Utils.fromJson(client.<String>readData(makePath(id))));
+			}
+			return metadata;
+		} catch (ZkException e) {
+			throw new MetadataServiceException(e);
+		}
+	}
+	
+	@Override
+	public void update(ContainerMetadata meta) throws MetadataServiceException {
+		try {
+			client.writeData(makePath(meta.id), Utils.toJson(meta));
+		} catch (ZkException e) {
+			throw new MetadataServiceException(e);
+		}
+	}
+	
+	@Override
+	public void delete(String id) throws MetadataServiceException {
+		try {
+			client.delete(makePath(id));
+		} catch (ZkException e) {
+			throw new MetadataServiceException(e);
+		}
+	}
+	
+	String makePath(String containerId) {
+		return BASE_PATH + "/" + containerId;
+	}
+	
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ApplicationConfig.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ApplicationConfig.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ApplicationConfig.java
new file mode 100644
index 0000000..5950d42
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ApplicationConfig.java
@@ -0,0 +1,32 @@
+package org.apache.helix.metamanager.yarn;
+
+public class ApplicationConfig {
+	final String clusterAddress;
+	final String clusterName;
+	final String providerAddress;
+	final String providerName;
+
+	public ApplicationConfig(String clusterAddress, String clusterName,
+			String providerAddress, String providerName) {
+		this.clusterAddress = clusterAddress;
+		this.clusterName = clusterName;
+		this.providerAddress = providerAddress;
+		this.providerName = providerName;
+	}
+
+	public String getClusterAddress() {
+		return clusterAddress;
+	}
+
+	public String getClusterName() {
+		return clusterName;
+	}
+
+	public String getProviderAddress() {
+		return providerAddress;
+	}
+
+	public String getProviderName() {
+		return providerName;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ContainerMetadata.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ContainerMetadata.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ContainerMetadata.java
new file mode 100644
index 0000000..1245080
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ContainerMetadata.java
@@ -0,0 +1,50 @@
+package org.apache.helix.metamanager.yarn;
+
+
+class ContainerMetadata {
+
+	static enum ContainerState {
+		ACQUIRE,
+		CONNECTING,
+		ACTIVE,
+		TEARDOWN,
+		FAILED,
+		HALTED,
+		FINALIZE
+	}
+	
+	String id;
+	ContainerState state;
+	int yarnId;
+	String command;
+	String owner;
+
+	public ContainerMetadata() {
+		// left blank
+	}
+	
+	public ContainerMetadata(String id, String command, String owner) {
+		this.id = id;
+		this.state = ContainerState.ACQUIRE;
+		this.yarnId = -1;
+		this.command = command;
+		this.owner = owner;
+	}
+	
+	public ContainerMetadata(ContainerMetadata node, ContainerState state) {
+		this.id = node.id;
+		this.state = state;
+		this.yarnId = node.yarnId;
+		this.command = node.command;
+		this.owner = node.owner;
+	}
+	
+	public ContainerMetadata(ContainerMetadata node, ContainerState state, int yarnId) {
+		this.id = node.id;
+		this.state = state;
+		this.yarnId = yarnId;
+		this.command = node.command;
+		this.owner = node.owner;
+	}
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ContainerNode.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ContainerNode.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ContainerNode.java
new file mode 100644
index 0000000..59b9325
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ContainerNode.java
@@ -0,0 +1,61 @@
+package org.apache.helix.metamanager.yarn;
+
+import java.io.Serializable;
+
+import org.apache.hadoop.yarn.api.records.ContainerId;
+
+
+class ContainerNode implements Serializable {
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = 2578978959080378923L;
+
+	static enum ContainerState {
+		ACQUIRE,
+		CONNECT,
+		READY,
+		STARTING,
+		RUNNING,
+		STOPPING,
+		TEARDOWN,
+		FINALIZE
+	}
+	
+	final String id;
+	final ContainerState state;
+	final ContainerId yarnId;
+	
+	final String zkAddress;
+	final String clusterName;
+	final String command;
+
+	public ContainerNode(String id, String zkAddress, String clusterName, String command) {
+		this.id = id;
+		this.state = ContainerState.ACQUIRE;
+		this.yarnId = null;
+		this.zkAddress = zkAddress;
+		this.clusterName = clusterName;
+		this.command = command;
+	}
+	
+	public ContainerNode(ContainerNode node, ContainerState state) {
+		this.id = node.id;
+		this.state = state;
+		this.yarnId = node.yarnId;
+		this.zkAddress = node.zkAddress;
+		this.clusterName = node.clusterName;
+		this.command = node.command;
+	}
+	
+	public ContainerNode(ContainerNode node, ContainerState state, ContainerId yarnId) {
+		this.id = node.id;
+		this.state = state;
+		this.yarnId = yarnId;
+		this.zkAddress = node.zkAddress;
+		this.clusterName = node.clusterName;
+		this.command = node.command;
+	}
+	
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/MessageNode.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/MessageNode.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/MessageNode.java
new file mode 100644
index 0000000..ba5be81
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/MessageNode.java
@@ -0,0 +1,20 @@
+package org.apache.helix.metamanager.yarn;
+
+
+class MessageNode {
+	static enum MessageType {
+		CREATE,
+		START,
+		STOP,
+		DESTROY
+	}
+	
+	final String id;
+	final MessageType type;
+
+	public MessageNode(String id, MessageType type) {
+		this.id = id;
+		this.type = type;
+	}
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/MetadataService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/MetadataService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/MetadataService.java
new file mode 100644
index 0000000..be88826
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/MetadataService.java
@@ -0,0 +1,146 @@
+package org.apache.helix.metamanager.yarn;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.concurrent.TimeoutException;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.exception.ZkException;
+import org.apache.helix.metamanager.yarn.ContainerMetadata.ContainerState;
+import org.apache.log4j.Logger;
+
+public class MetadataService {
+	
+	static final Logger log = Logger.getLogger(MetadataService.class);
+	
+	static final String CONTAINER_NAMESPACE = "containers";
+	
+//	static final String LOCK_PATH = "/" + CONTAINER_NAMESPACE + "/lock";
+	static final long POLL_INTERVAL = 100;
+
+	final ApplicationConfig appConfig;
+	
+	ZkClient client;
+	String basePath;
+	
+	public MetadataService(ApplicationConfig appConfig) {
+		this.appConfig = appConfig;
+	}
+
+	public void start() {
+		basePath = "/" + CONTAINER_NAMESPACE;
+		log.debug(String.format("starting metadata service for '%s/%s'", appConfig.providerAddress, appConfig.providerName));
+		
+		client = new ZkClient(appConfig.providerAddress);
+		
+		client.createPersistent(basePath, true);
+	}
+	
+	public void stop() {
+		log.debug(String.format("stopping metadata service for '%s/%s'", appConfig.providerAddress, appConfig.providerName));
+		if(client != null) {
+			client.close();
+			client = null;
+		}
+	}
+	
+//	public void lock(long timeout) throws Exception {
+//		long limit = System.currentTimeMillis() + timeout;
+//		while (limit > System.currentTimeMillis()) {
+//			try {
+//				client.createEphemeral(LOCK_PATH);
+//				return;
+//			} catch (Exception ignore) {}
+//			Thread.sleep(POLL_INTERVAL);
+//		}
+//		throw new IllegalStateException("Could not acquire lock");
+//	}
+//	
+//	public void unlock() {
+//		client.delete(LOCK_PATH);
+//	}
+	
+	public void create(ContainerMetadata meta) throws IllegalMetadataStateException {
+		try {
+			client.createPersistent(makePath(meta.id), Utils.toJson(meta));
+		} catch (ZkException e) {
+			throw new IllegalMetadataStateException(e);
+		}
+	}
+	
+	public ContainerMetadata read(String id) throws IllegalMetadataStateException {
+		try {
+			return Utils.fromJson(client.<String>readData(makePath(id)));
+		} catch (ZkException e) {
+			throw new IllegalMetadataStateException(e);
+		}
+	}
+	
+	public Collection<ContainerMetadata> readAll() throws IllegalMetadataStateException {
+		try {
+			Collection<ContainerMetadata> metadata = new ArrayList<ContainerMetadata>();
+			for(String id : client.getChildren(basePath)) {
+				metadata.add(Utils.fromJson(client.<String>readData(makePath(id))));
+			}
+			return metadata;
+		} catch (ZkException e) {
+			throw new IllegalMetadataStateException(e);
+		}
+	}
+	
+	public void update(ContainerMetadata meta) throws IllegalMetadataStateException {
+		try {
+			client.writeData(makePath(meta.id), Utils.toJson(meta));
+		} catch (ZkException e) {
+			throw new IllegalMetadataStateException(e);
+		}
+	}
+	
+	public void delete(String id) throws IllegalMetadataStateException {
+		try {
+			client.delete(makePath(id));
+		} catch (ZkException e) {
+			throw new IllegalMetadataStateException(e);
+		}
+	}
+	
+	public void waitForState(String id, ContainerState state, long timeout) throws IllegalMetadataStateException, InterruptedException, TimeoutException {
+		long limit = System.currentTimeMillis() + timeout;
+		ContainerMetadata meta = read(id);
+		while(meta.state != state) {
+			if(System.currentTimeMillis() >= limit) {
+				throw new TimeoutException(String.format("Container '%s' failed to reach state '%s' (currently is '%s')", id, state, meta.state));
+			}
+			Thread.sleep(POLL_INTERVAL);
+			meta = read(id);
+		}
+	}
+	
+	String makePath(String containerId) {
+		return basePath + "/" + containerId;
+	}
+	
+	public static class IllegalMetadataStateException extends Exception {
+
+		/**
+		 * 
+		 */
+		private static final long serialVersionUID = -2846997013918977056L;
+
+		public IllegalMetadataStateException() {
+			super();
+		}
+
+		public IllegalMetadataStateException(String message, Throwable cause) {
+			super(message, cause);
+		}
+
+		public IllegalMetadataStateException(String message) {
+			super(message);
+		}
+
+		public IllegalMetadataStateException(Throwable cause) {
+			super(cause);
+		}	
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/Utils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/Utils.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/Utils.java
new file mode 100644
index 0000000..49f70d3
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/Utils.java
@@ -0,0 +1,93 @@
+package org.apache.helix.metamanager.yarn;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.yarn.ContainerMetadata.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.TypeAdapter;
+import com.google.gson.stream.JsonReader;
+import com.google.gson.stream.JsonToken;
+import com.google.gson.stream.JsonWriter;
+
+public class Utils {
+	
+	static final Logger log = Logger.getLogger(Utils.class);
+	
+	static Gson gson;
+	static {
+		GsonBuilder builder = new GsonBuilder();
+		builder.registerTypeAdapter(ContainerState.class, new ContainerStateAdapter());
+		builder.setPrettyPrinting();
+		gson = builder.create();
+	}
+	static Map<String, LocalResource>  dummyResources = createDummyResources();
+	
+	static String toJson(ContainerMetadata meta) {
+		return gson.toJson(meta);
+	}
+	
+	static ContainerMetadata fromJson(String json) {
+		return gson.fromJson(json, ContainerMetadata.class);
+	}
+	
+	static Map<String, LocalResource> getDummyResources() {
+		return dummyResources;
+	}
+
+	private static Map<String, LocalResource> createDummyResources() {
+		File dummy = new File("/tmp/dummy");
+		
+		if(!dummy.exists()) {
+	    	try {
+	    		dummy.createNewFile();
+	    	} catch(Exception e) {
+	    		log.error("could not create dummy file", e);
+	    		System.exit(1);
+	    	}
+		}
+	    
+	    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
+	    Path path = new Path(dummy.toURI());
+	    LocalResource localResource = Records.newRecord(LocalResource.class);
+	    localResource.setType(LocalResourceType.FILE);
+	    localResource.setVisibility(LocalResourceVisibility.APPLICATION);          
+	    localResource.setResource(ConverterUtils.getYarnUrlFromPath(path)); 
+	    localResource.setTimestamp(dummy.lastModified());
+	    localResource.setSize(dummy.length());
+	    localResources.put("dummy", localResource);
+		return localResources;
+	}
+	
+	static class ContainerStateAdapter extends TypeAdapter<ContainerState> {
+		@Override
+		public ContainerState read(JsonReader reader) throws IOException {
+			if (reader.peek() == JsonToken.NULL) {
+				reader.nextNull();
+				return null;
+			}
+			return ContainerState.valueOf(reader.nextString());
+		}
+
+		@Override
+		public void write(JsonWriter writer, ContainerState value) throws IOException {
+			if (value == null) {
+				writer.nullValue();
+				return;
+			}
+			writer.value(value.name());
+		}
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnApplication.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnApplication.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnApplication.java
new file mode 100644
index 0000000..7d2099a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnApplication.java
@@ -0,0 +1,126 @@
+package org.apache.helix.metamanager.yarn;
+
+import java.net.InetSocketAddress;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.ClientRMProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+public class YarnApplication {
+
+	static final Logger log = Logger.getLogger(YarnApplication.class);
+	
+	static final String ENV_CLUSTER_ADDRESS = "CLUSTER_ADDRESS";
+	static final String ENV_CLUSTER_NAME = "CLUSTER_NAME";
+	static final String ENV_PROVIDER_ADDRESS = "PROVIDER_ADDRESS";
+	static final String ENV_PROVIDER_NAME = "PROVIDER_NAME";
+
+	static final String MASTER_COMMAND = "metamanager.master.command";
+	static final String DEFAULT_MASTER_COMMAND = "/bin/sh /home/apucher/incubator-helix/recipes/meta-cluster-manager/target/meta-cluster-manager-pkg/bin/yarn-master-process.sh 1>%s/stdout 2>%s/stderr";
+
+	Configuration conf;
+	YarnRPC rpc;
+	ClientRMProtocol rmClient;
+	ApplicationId appId;
+	
+	final ApplicationConfig appConfig;
+
+	public YarnApplication(ApplicationConfig appConfig) {
+		this.appConfig = appConfig;
+		configure(new YarnConfiguration());
+	}
+
+	public void start() throws Exception {
+		connect();
+		
+		String command = String.format(conf.get(MASTER_COMMAND, DEFAULT_MASTER_COMMAND), "/tmp/" + appConfig.providerName, "/tmp/" + appConfig.providerName); 
+				//ApplicationConstants.LOG_DIR_EXPANSION_VAR, ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+
+		log.info(String.format("Starting application '%s/%s' (masterCommand='%s')", appConfig.providerAddress, appConfig.providerName, command));
+
+		// app id
+		GetNewApplicationRequest appRequest = Records.newRecord(GetNewApplicationRequest.class);
+		GetNewApplicationResponse appResponse = rmClient.getNewApplication(appRequest);
+
+		this.appId = appResponse.getApplicationId();
+
+		log.info(String.format("Acquired app id '%s' for '%s/%s'", appId.toString(), appConfig.providerAddress, appConfig.providerName));
+		
+		// command
+		ContainerLaunchContext launchContext = Records.newRecord(ContainerLaunchContext.class);
+		launchContext.setCommands(Collections.singletonList(command));
+
+		// resource limit
+		Resource resource = Records.newRecord(Resource.class);
+		resource.setMemory(256); // TODO make dynamic
+		launchContext.setResource(resource);
+		
+	    // environment
+	    Map<String, String> env = new HashMap<String, String>();
+	    env.put(ENV_CLUSTER_ADDRESS, appConfig.clusterAddress);
+	    env.put(ENV_CLUSTER_NAME, appConfig.clusterName);
+	    env.put(ENV_PROVIDER_ADDRESS, appConfig.providerAddress);
+	    env.put(ENV_PROVIDER_NAME, appConfig.providerName);
+	    launchContext.setEnvironment(env);
+	    
+	    // local resources
+	    // YARN workaround: create dummy resource 
+	    Map<String, LocalResource> localResources = Utils.getDummyResources();
+	    launchContext.setLocalResources(localResources);
+	    
+	    // app submission
+	    ApplicationSubmissionContext subContext = Records.newRecord(ApplicationSubmissionContext.class);
+		subContext.setApplicationId(appId);
+		subContext.setApplicationName(appConfig.providerName);
+		subContext.setAMContainerSpec(launchContext);
+
+		SubmitApplicationRequest subRequest = Records.newRecord(SubmitApplicationRequest.class);
+		subRequest.setApplicationSubmissionContext(subContext);
+		
+		log.info(String.format("Starting app id '%s'", appId.toString()));
+
+		rmClient.submitApplication(subRequest);
+		
+	}
+
+	public void stop() throws YarnRemoteException {
+		log.info(String.format("Stopping app id '%s'", appId.toString()));
+		KillApplicationRequest killRequest = Records.newRecord(KillApplicationRequest.class);
+		killRequest.setApplicationId(appId);
+
+		rmClient.forceKillApplication(killRequest);
+	}
+
+	void configure(Configuration conf) {
+		this.conf = Preconditions.checkNotNull(conf);
+		this.rpc = YarnRPC.create(conf);
+	}
+
+	void connect() {
+		YarnConfiguration yarnConf = new YarnConfiguration(conf);
+		InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(
+				YarnConfiguration.RM_ADDRESS,
+				YarnConfiguration.DEFAULT_RM_ADDRESS));
+		log.info("Connecting to ResourceManager at: " + rmAddress);
+		this.rmClient = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, conf));
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnClient.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnClient.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnClient.java
new file mode 100644
index 0000000..3447661
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnClient.java
@@ -0,0 +1,5 @@
+package org.apache.helix.metamanager.yarn;
+
+public class YarnClient {
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainer.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainer.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainer.java
new file mode 100644
index 0000000..d36eee9
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainer.java
@@ -0,0 +1,14 @@
+package org.apache.helix.metamanager.yarn;
+
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.util.Tool;
+
+public class YarnContainer extends Configured implements Tool {
+
+	@Override
+	public int run(String[] args) throws Exception {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainerProvider.java
new file mode 100644
index 0000000..34a6b61
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainerProvider.java
@@ -0,0 +1,90 @@
+package org.apache.helix.metamanager.yarn;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+import org.apache.helix.metamanager.yarn.ContainerMetadata.ContainerState;
+import org.apache.log4j.Logger;
+
+public class YarnContainerProvider implements ClusterContainerProvider {
+	
+	static final Logger log = Logger.getLogger(YarnContainerProvider.class);
+
+	static final String REQUIRED_TYPE = "container";
+	
+	static final long LOCK_TIMEOUT = 1000;
+	static final long CONTAINER_TIMEOUT = 10000;
+	
+	/*
+	 * CONTAINERS
+	 *   A (A, READY)
+	 *   B (B, RUNNING)
+	 */
+	
+	final ApplicationConfig appConfig;
+	final String command;
+	
+	final Object notifier = new Object();
+	
+	MetadataService metaService;
+	
+	public YarnContainerProvider(ApplicationConfig appConfig, String command) {
+		this.appConfig = appConfig;
+		this.command = command;
+	}
+
+	@Override
+	public void create(final String id, final String type) throws Exception {
+		if(!REQUIRED_TYPE.equals(type)) {
+			throw new IllegalArgumentException(String.format("Type '%s' not supported", type));
+		}
+		
+		metaService.create(new ContainerMetadata(id, command, appConfig.providerName));
+		metaService.waitForState(id, ContainerState.ACTIVE, CONTAINER_TIMEOUT);
+	}
+
+	@Override
+	public void destroy(final String id) throws Exception {
+		ContainerMetadata meta = metaService.read(id);
+
+		if(meta.state == ContainerState.ACTIVE) {
+			log.info(String.format("Destroying active container, going to teardown"));
+			metaService.update(new ContainerMetadata(meta, ContainerState.TEARDOWN));
+			
+		} else if(meta.state == ContainerState.FAILED) {
+			log.info(String.format("Destroying failed container, going to halted"));
+			metaService.update(new ContainerMetadata(meta, ContainerState.HALTED));
+			
+		} else if(meta.state == ContainerState.FINALIZE) {
+			log.info(String.format("Destroying finalized container, skipping"));
+			
+		} else {
+			throw new IllegalStateException(String.format("Container '%s' must be active, failed or finalized", id));
+		}
+		
+		metaService.waitForState(id, ContainerState.FINALIZE, CONTAINER_TIMEOUT);
+		metaService.delete(id);
+	}
+
+	@Override
+	public void destroyAll() {
+		try {
+			for(ContainerMetadata meta : metaService.readAll()) {
+				try { destroy(meta.id); } catch (Exception ignore) {}
+			}
+		} catch (Exception ignore) {
+			// ignore
+		}
+	}
+
+	public void startService() {
+		metaService = new MetadataService(appConfig);
+		metaService.start();
+	}
+	
+	public void stopService() {
+		if(metaService != null) {
+			metaService.stop();
+			metaService = null;
+		}
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainerService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainerService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainerService.java
new file mode 100644
index 0000000..855dddd
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainerService.java
@@ -0,0 +1,370 @@
+package org.apache.helix.metamanager.yarn;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ContainerManager;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.records.AMResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.yarn.ContainerMetadata.ContainerState;
+import org.apache.helix.metamanager.yarn.MetadataService.IllegalMetadataStateException;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Maps;
+
+public class YarnContainerService {
+
+	static final Logger log = Logger.getLogger(YarnContainerService.class);
+
+	static final String REQUIRED_TYPE = "container";
+	
+	static final long ZOOKEEPER_TIMEOUT = 5000;
+	
+	static final long YARNSERVICE_INTERVAL = 1000;
+	
+	static final String CONTAINERS = "CONTAINERS";
+	
+	static final String CONTAINER_COMMAND = "/bin/sh %s %s %s %s %s %s 1>%s/stdout 2>%s/stderr";
+
+	/*
+	 * CONTAINERS
+	 *   A (A, READY)
+	 *   B (B, RUNNING)
+	 */
+	
+	final ApplicationConfig appConfig;
+	final AMRMProtocol yarnClient;
+	final ApplicationAttemptId appAtemptId;
+	
+	final Configuration yarnConfig;
+	
+	final File dummy = new File("/tmp/dummy");
+	
+	final Map<ContainerId, Container> unassignedContainers = new HashMap<ContainerId, Container>();
+	final Map<ContainerId, Container> activeContainers = new HashMap<ContainerId, Container>();
+	final Map<ContainerId, ContainerStatus> completedContainers = new HashMap<ContainerId, ContainerStatus>();
+	final Map<ContainerId, String> yarn2meta = new HashMap<ContainerId, String>();
+	
+	int numRequestedLast = 0;
+
+	MetadataService metaService;
+	
+	ScheduledExecutorService executor;
+
+	public YarnContainerService(AMRMProtocol yarnClient, Configuration conf, ApplicationAttemptId appAttemptId, ApplicationConfig appConfig) {
+		this.appConfig = appConfig;
+		this.yarnClient = yarnClient;
+		this.appAtemptId = appAttemptId;
+		this.yarnConfig = conf;
+	}
+
+	public void startService() {
+		log.debug("starting container service");
+		
+		metaService = new MetadataService(appConfig);
+		metaService.start();
+		
+		executor = Executors.newSingleThreadScheduledExecutor();
+		executor.scheduleAtFixedRate(new YarnService(), 0, YARNSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+	}
+	
+	public void stopService() {
+		log.debug("stopping container service");
+		
+		if(executor != null) {
+			executor.shutdown();
+			while(!executor.isTerminated()) {
+				try {
+					Thread.sleep(100);
+				} catch (InterruptedException e) {
+					// ignore
+				}
+			}
+			executor = null;
+		}
+		
+		if(metaService != null) {
+			metaService.stop();
+			metaService = null;
+		}
+	}
+	
+	Collection<ContainerMetadata> readOwnedMetadata() throws IllegalMetadataStateException {
+		log.debug("reading container data");
+		
+		Collection<ContainerMetadata> containers = new ArrayList<ContainerMetadata>();
+		for(ContainerMetadata meta : metaService.readAll()) {
+			if(meta.owner.equals(appConfig.providerName)) {
+				containers.add(meta);
+				log.debug(String.format("found container node '%s' (state=%s, yarnId=%s, command=%s, owner=%s)", 
+						meta.id, meta.state, meta.yarnId, meta.command, meta.owner));
+			}
+		}
+		return containers;
+	}
+	
+	class YarnService implements Runnable {
+		int responseId = 0;
+		
+		@Override
+		public void run() {
+			try {
+				log.debug("running yarn service update cycle");
+				
+				Collection<ContainerMetadata> metadata = readOwnedMetadata();
+				
+				// active meta containers
+				int numMetaActive = countActiveMeta(metadata);
+				
+				// newly acquired meta containers
+				int numMetaAcquire = countAcquireMeta(metadata);
+				
+				// destroyed meta containers
+				List<ContainerId> destroyedReleasedIds = createDestroyedReleaseList(metadata);
+				int numMetaCompleted = destroyedReleasedIds.size();
+				
+				int numMeta = numMetaAcquire + numMetaActive + numMetaCompleted;
+				
+				// yarn containers
+				int numYarnUnassigned = unassignedContainers.size();
+				int numYarnActive = activeContainers.size();
+				int numYarnCompleted = completedContainers.size();
+				int numYarn = numYarnUnassigned + numYarnActive + numYarnCompleted;
+				
+				int numYarnRequired = numMetaAcquire - numYarnUnassigned;
+				
+				// additionally required containers
+				int numRequestAdditional = Math.max(0, numYarnRequired);
+				
+				// overstock containers
+				List<ContainerId> unneededReleasedIds = createOverstockReleaseList(numYarnRequired);
+				
+				log.debug(String.format("meta containers (total=%d, acquire=%d, active=%d, completed=%d)", numMeta, numMetaAcquire, numMetaActive, numMetaCompleted));
+				log.debug(String.format("yarn containers (total=%d, unassigned=%d, active=%d, completed=%d)", numYarn, numYarnUnassigned, numYarnActive, numYarnCompleted));
+				log.debug(String.format("requesting %d new containers (%d requested last), releasing %d", numRequestAdditional, numRequestedLast, destroyedReleasedIds.size()));
+				
+				Priority priority = Records.newRecord(Priority.class);
+				priority.setPriority(0);
+				
+				Resource resource = Records.newRecord(Resource.class);
+				resource.setMemory(256); // TODO make dynamic
+				
+				ResourceRequest resourceRequest = Records.newRecord(ResourceRequest.class);
+				resourceRequest.setHostName("*");
+				resourceRequest.setNumContainers(numRequestAdditional);
+				resourceRequest.setPriority(priority);
+				resourceRequest.setCapability(resource);
+				
+				AllocateRequest request = Records.newRecord(AllocateRequest.class);
+				request.setResponseId(responseId);
+				request.setApplicationAttemptId(appAtemptId);
+				request.addAsk(resourceRequest);
+				request.addAllReleases(destroyedReleasedIds);
+				request.addAllReleases(unneededReleasedIds);
+				
+				responseId++;
+				
+				AllocateResponse allocateResponse = null;
+				try {
+					allocateResponse = yarnClient.allocate(request);
+				} catch (YarnRemoteException e) {
+					// ignore
+					log.error("Error allocating containers", e);
+					return;
+				}
+				
+				numRequestedLast = numRequestAdditional;
+				
+				AMResponse response = allocateResponse.getAMResponse();
+				
+				// newly added containers
+				for(Container container : response.getAllocatedContainers()) {
+					unassignedContainers.put(container.getId(), container);
+				}
+				
+				log.info(String.format("%d new containers available, %d required", unassignedContainers.size(), numMetaAcquire));
+				
+				Iterator<Container> itYarn = unassignedContainers.values().iterator();
+				Iterator<ContainerMetadata> itMeta = metadata.iterator();
+				while(itYarn.hasNext() && itMeta.hasNext()) {
+					ContainerMetadata meta = itMeta.next();
+					
+					if(meta.yarnId >= 0)
+						continue;
+					
+					Container containerYarn = itYarn.next();
+					
+					log.debug(String.format("assigning yarn container '%s' to container node '%s'", containerYarn.getId(), meta.id));
+					
+					String command = String.format(CONTAINER_COMMAND, meta.command,
+							appConfig.clusterAddress, appConfig.clusterName, appConfig.providerAddress, appConfig.providerName,
+							meta.id, "/tmp/" + meta.id, "/tmp/" + meta.id);  
+							//ApplicationConstants.LOG_DIR_EXPANSION_VAR, ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+					
+					ContainerLaunchContext context = Records.newRecord(ContainerLaunchContext.class);
+					context.setContainerId(containerYarn.getId());
+					context.setResource(containerYarn.getResource());
+					context.setEnvironment(Maps.<String, String>newHashMap());
+					context.setCommands(Collections.singletonList(command));
+					context.setLocalResources(Utils.getDummyResources());
+					try {
+						context.setUser(UserGroupInformation.getCurrentUser().getShortUserName());
+					} catch (IOException e) {
+						log.error(String.format("failed setting up container '%s' user information", meta.id));
+						return;
+					}
+					
+					log.debug(String.format("container '%s' executing command '%s'", meta.id, command));
+
+					StartContainerRequest startReq = Records.newRecord(StartContainerRequest.class);
+					startReq.setContainerLaunchContext(context);
+					
+					try {
+						getContainerManager(containerYarn).startContainer(startReq);
+
+					} catch (YarnRemoteException e) {
+						log.error(String.format("Error starting container '%s'", meta.id), e);
+						return;
+					}
+						
+					log.debug(String.format("container '%s' started, updating container node", meta.id));
+
+					metaService.update(new ContainerMetadata(meta, ContainerState.CONNECTING, containerYarn.getId().getId()));
+					yarn2meta.put(containerYarn.getId(), meta.id);
+					
+					log.debug(String.format("removing '%s' from unassigned yarn containers and adding to active list", containerYarn.getId()));
+
+					itYarn.remove();
+					activeContainers.put(containerYarn.getId(), containerYarn);
+					
+				}
+				
+				for(ContainerStatus status : response.getCompletedContainersStatuses()) {
+					ContainerId id = status.getContainerId();
+					
+					log.info(String.format("Container '%s' completed", id));
+					
+					if(unassignedContainers.containsKey(id)) {
+						log.info(String.format("Unassigned container '%s' terminated, removing", id));
+						unassignedContainers.remove(id);
+						// TODO handle
+					}
+					
+					if(activeContainers.containsKey(id)) {
+						log.info(String.format("Active container '%s' terminated, removing", id));
+						activeContainers.remove(id);
+						
+						String metaId = yarn2meta.get(id);
+						ContainerMetadata meta = metaService.read(metaId);
+						
+						log.debug(String.format("container '%s' finalized, updating container node", meta.id));
+						
+						metaService.update(new ContainerMetadata(meta, ContainerState.FINALIZE));
+					}
+					
+					completedContainers.put(id, status);
+				}
+
+				log.debug("yarn service update cycle complete");
+				
+			} catch (Exception e) {
+				log.error("Error while executing yarn update cycle", e);
+			}
+		}
+
+		private List<ContainerId> createOverstockReleaseList(int numYarnRequired) {
+			List<ContainerId> unneededReleasedIds = new ArrayList<ContainerId>();
+			Iterator<Container> itUnassigned = unassignedContainers.values().iterator();
+			if(numYarnRequired < 0) {
+				for(int i=0; i<-numYarnRequired && itUnassigned.hasNext(); i++) {
+					Container container = itUnassigned.next();
+					unneededReleasedIds.add(container.getId());
+					log.debug(String.format("Container '%s' no longer required, removing", container.getId()));
+					itUnassigned.remove();
+				}
+			}
+			return unneededReleasedIds;
+		}
+
+		private List<ContainerId> createDestroyedReleaseList(
+				Collection<ContainerMetadata> metadata) {
+			List<ContainerId> releasedIds = new ArrayList<ContainerId>();
+			for(ContainerMetadata meta : metadata) {
+				if(meta.state == ContainerState.HALTED) {
+					ContainerId containerId = Records.newRecord(ContainerId.class);
+					containerId.setApplicationAttemptId(appAtemptId);
+					containerId.setId(meta.yarnId);
+					releasedIds.add(containerId);
+					log.debug(String.format("releasing container '%s'", containerId));
+				}
+			}
+			return releasedIds;
+		}
+
+		private int countAcquireMeta(Collection<ContainerMetadata> metadata) {
+			int numMetaAcquire = 0;
+			for(ContainerMetadata meta : metadata) {
+				if(meta.state == ContainerState.ACQUIRE) {
+					numMetaAcquire++;
+				}
+			}
+			return numMetaAcquire;
+		}
+
+		private int countActiveMeta(Collection<ContainerMetadata> metadata) {
+			int numMetaActive = 0;
+			for(ContainerMetadata meta : metadata) {
+				if(meta.state != ContainerState.ACQUIRE &&
+				   meta.state != ContainerState.HALTED &&
+				   meta.state != ContainerState.FINALIZE) {
+					numMetaActive++;
+				}
+			}
+			return numMetaActive;
+		}
+	}
+	
+	private ContainerManager getContainerManager(Container container) {
+		YarnConfiguration yarnConf = new YarnConfiguration(yarnConfig);
+		YarnRPC rpc = YarnRPC.create(yarnConf);
+		NodeId nodeId = container.getNodeId();
+		String containerIpPort = String.format("%s:%d", nodeId.getHost(),
+				nodeId.getPort());
+		log.info("Connecting to ContainerManager at: " + containerIpPort);
+		InetSocketAddress addr = NetUtils.createSocketAddr(containerIpPort);
+		ContainerManager cm = (ContainerManager) rpc.getProxy(
+				ContainerManager.class, addr, yarnConfig);
+		return cm;
+	}
+		  
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnHelper.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnHelper.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnHelper.java
new file mode 100644
index 0000000..4314bdc
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnHelper.java
@@ -0,0 +1,5 @@
+package org.apache.helix.metamanager.yarn;
+
+public class YarnHelper {
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnMaster.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnMaster.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnMaster.java
new file mode 100644
index 0000000..a2aef0e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnMaster.java
@@ -0,0 +1,134 @@
+package org.apache.helix.metamanager.yarn;
+
+import java.net.InetSocketAddress;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.log4j.Logger;
+
+public class YarnMaster extends Configured implements Tool {
+
+	static final Logger log = Logger.getLogger(YarnMaster.class);
+	
+	AMRMProtocol resourceManager;
+	ApplicationAttemptId appAttemptId;
+	
+	YarnContainerService service;
+	
+	@Override
+	public int run(String[] args) throws Exception {
+		log.trace("BEGIN YarnMaster.run()");
+			
+		Configuration conf = getConf();
+		
+		this.appAttemptId = getApplicationAttemptId();
+		log.info(String.format("Got application attempt id '%s'", appAttemptId.toString()));
+		
+		log.debug("Getting resource manager");
+		this.resourceManager = getResourceManager(conf);
+
+	    // register the AM with the RM
+		log.debug("Registering application master");
+	    RegisterApplicationMasterRequest appMasterRequest = 
+	        Records.newRecord(RegisterApplicationMasterRequest.class);
+	    appMasterRequest.setApplicationAttemptId(appAttemptId);     
+	    appMasterRequest.setHost("");
+	    appMasterRequest.setRpcPort(0);
+	    appMasterRequest.setTrackingUrl("");
+
+	    resourceManager.registerApplicationMaster(appMasterRequest);
+
+	    String clusterAddress = getEnv(YarnApplication.ENV_CLUSTER_ADDRESS);
+	    String clusterName = getEnv(YarnApplication.ENV_CLUSTER_NAME);
+	    String providerAddress = getEnv(YarnApplication.ENV_PROVIDER_ADDRESS);
+	    String providerName = getEnv(YarnApplication.ENV_PROVIDER_NAME);
+	    ApplicationConfig appConfig = new ApplicationConfig(clusterAddress, clusterName, providerAddress, providerName);
+	    
+	    service = new YarnContainerService(resourceManager, conf, appAttemptId, appConfig);
+	    service.startService();
+	    
+	    Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+	    	@Override
+	    	public void run() {
+
+	    		service.stopService();
+	    		
+	    		// finish application
+	    	    log.debug("Sending finish request");
+	    	    FinishApplicationMasterRequest finishReq = 
+	    	    	Records.newRecord(FinishApplicationMasterRequest.class);
+	    	    
+	    	    finishReq.setAppAttemptId(getApplicationAttemptId());
+	    	    finishReq.setFinishApplicationStatus(FinalApplicationStatus.SUCCEEDED);
+	    	    
+	    	    try { resourceManager.finishApplicationMaster(finishReq); } catch(Exception ignore) {}
+	    	}
+	    }));
+	    
+	    try { Thread.currentThread().join(); } catch(Exception ignore) {}
+	    
+		log.trace("END YarnMaster.run()");
+		
+		return 0;
+	}
+
+	private AMRMProtocol getResourceManager(Configuration conf) {
+		// Connect to the Scheduler of the ResourceManager.
+	    YarnConfiguration yarnConf = new YarnConfiguration(conf);
+	    YarnRPC rpc = YarnRPC.create(yarnConf);
+	    InetSocketAddress rmAddress = 
+	        NetUtils.createSocketAddr(yarnConf.get(
+	            YarnConfiguration.RM_SCHEDULER_ADDRESS,
+	            YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS));           
+	    log.info("Connecting to ResourceManager at " + rmAddress);
+	    AMRMProtocol resourceManager = 
+	        (AMRMProtocol) rpc.getProxy(AMRMProtocol.class, rmAddress, conf);
+		return resourceManager;
+	}
+
+	private ApplicationAttemptId getApplicationAttemptId() {
+	    ContainerId containerId = ConverterUtils.toContainerId(getEnv(ApplicationConstants.AM_CONTAINER_ID_ENV));
+	    ApplicationAttemptId appAttemptID = containerId.getApplicationAttemptId();
+		return appAttemptID;
+	}
+	
+	private String getEnv(String key) {
+		Map<String, String> envs = System.getenv();
+	    String clusterName = envs.get(key);
+	    if (clusterName == null) {
+	      // container id should always be set in the env by the framework 
+	      throw new IllegalArgumentException(
+	          String.format("%s not set in the environment", key));
+	    }
+	    return clusterName;
+	}
+
+	public static void main(String[] args) throws Exception {
+		log.trace("BEGIN YarnMaster.main()");
+
+		try {
+			int rc = ToolRunner.run(new Configuration(), new YarnMaster(), args);
+			System.exit(rc);
+		} catch (Exception e) {
+			System.err.println(e);
+			System.exit(1);
+		}
+
+		log.trace("END YarnMaster.main()");
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnProcess.java
new file mode 100644
index 0000000..7108d39
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnProcess.java
@@ -0,0 +1,171 @@
+package org.apache.helix.metamanager.yarn;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.metamanager.managed.ManagedFactory;
+import org.apache.helix.metamanager.yarn.ContainerMetadata.ContainerState;
+import org.apache.log4j.Logger;
+
+public class YarnProcess {
+	static final Logger log = Logger.getLogger(YarnProcess.class);
+
+	static final long CONTAINERSERVICE_INTERVAL = 1000;
+
+	final ApplicationConfig appConfig;
+	final String containerId;
+	
+	HelixManager participantManager;
+
+	MetadataService metaService;
+	ScheduledExecutorService executor;
+
+
+	public YarnProcess(ApplicationConfig appConfig, String containerId) {
+		this.appConfig = appConfig;
+		this.containerId = containerId;
+	}
+
+	public void startService() {
+		log.info(String.format("start metadata service for '%s'", containerId));
+		metaService = new MetadataService(appConfig);
+		metaService.start();
+		
+		executor = Executors.newSingleThreadScheduledExecutor();
+		executor.scheduleAtFixedRate(new ContainerService(), 0, CONTAINERSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+	}
+
+	public void stopService() {
+		log.info(String.format("stop metadata service for '%s'", containerId));
+		if (metaService != null) {
+			metaService.stop();
+			metaService = null;
+		}
+		
+		if(executor != null) {
+			executor.shutdown();
+		}
+	}
+	
+	public boolean isRunning() {
+		if(executor == null)
+			return false;
+		return !executor.isTerminated();
+	}
+	
+	public void startParticipant() throws Exception {
+		log.info("STARTING " + containerId);
+		participantManager = HelixManagerFactory.getZKHelixManager(appConfig.clusterName,
+				containerId, InstanceType.PARTICIPANT, appConfig.clusterAddress);
+		participantManager.getStateMachineEngine().registerStateModelFactory(
+				"MasterSlave", new ManagedFactory());
+		participantManager.connect();
+		log.info("STARTED " + containerId);
+	}
+
+	public void stopParticipant() {
+		if (participantManager != null) {
+			participantManager.disconnect();
+			participantManager = null;
+		}
+	}
+	
+	public void updateContainerStatus() {
+		log.info("updating container status");
+		try {
+			ContainerMetadata meta = metaService.read(containerId);
+			
+			if(meta.state == ContainerState.CONNECTING) {
+				log.info("container connecting, going to active");
+				try {
+					startParticipant();
+					metaService.update(new ContainerMetadata(meta, ContainerState.ACTIVE));
+				} catch (Exception e) {
+					log.error("Failed to start participant, going to failed", e);
+					stopParticipant();
+					metaService.update(new ContainerMetadata(meta, ContainerState.FAILED));
+				}
+			}
+			
+			if(meta.state == ContainerState.ACTIVE) {
+				// do something
+				// and go to failed on error
+			}
+			
+			if(meta.state == ContainerState.TEARDOWN) {
+				log.info("container teardown, going to halted");
+				stopParticipant();
+				metaService.update(new ContainerMetadata(meta, ContainerState.HALTED));
+				stopService();
+			}
+			
+		} catch(Exception e) {
+			log.warn(String.format("Container '%s' does not exist, stopping service", containerId));
+			stopService();
+		}
+	}
+	
+	class ContainerService implements Runnable {
+		@Override
+		public void run() {
+			updateContainerStatus();
+		}
+	}
+
+  public static void main(String[] args) throws Exception
+  {
+	log.trace("BEGIN YarnProcess.main()");
+	  
+    final String clusterAddress = args[0];
+    final String clusterName = args[1];
+    final String providerAddress = args[2];
+    final String providerName = args[3];
+    final String containerId = args[4];
+
+    final ApplicationConfig appConfig = new ApplicationConfig(clusterAddress, clusterName, providerAddress, providerName);
+    
+    final YarnProcess yarnProcess = new YarnProcess(appConfig, containerId);
+
+    yarnProcess.startService();
+    
+    Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+		@Override
+		public void run() {
+			yarnProcess.stopService();
+		}
+	}));
+    
+	while(yarnProcess.isRunning()) {
+		try {
+			Thread.sleep(100);
+		} catch (InterruptedException e) {
+			// ignore
+		}
+	}
+	
+	log.trace("END YarnProcess.main()");
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/2by2local.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/2by2local.properties b/recipes/meta-cluster-manager/src/main/resources/2by2local.properties
new file mode 100644
index 0000000..ac7968a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/2by2local.properties
@@ -0,0 +1,52 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Meta Cluster
+#
+meta.cluster=meta
+meta.managed=managed
+meta.address=localhost:2199
+
+meta.target.type=static
+meta.target.database=3
+meta.target.webserver=5
+
+meta.status.type=local
+
+meta.provider.type=local
+meta.provider.name=provider_0
+meta.provider.address=localhost:2199
+meta.provider.cluster=managed
+
+meta.provider.containers=database,webserver
+
+meta.provider.container.database.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+meta.provider.container.database.prop1=foo
+meta.provider.container.database.prop2=bar
+
+meta.provider.container.webserver.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+meta.provider.container.webserver.prop1=foo
+meta.provider.container.webserver.prop2=bar
+
+#
+# Managed Cluster
+#
+managed.cluster=managed
+managed.address=localhost:2199
+
+managed.resources=dbprod,wsprod
+
+managed.resource.dbprod.container=database
+managed.resource.dbprod.model=MasterSlave
+managed.resource.dbprod.partitions=8
+managed.resource.dbprod.replica=3
+
+managed.resource.wsprod.container=webserver
+managed.resource.wsprod.model=MasterSlave
+managed.resource.wsprod.partitions=15
+managed.resource.wsprod.replica=1

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/2by2localMixedModels.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/2by2localMixedModels.properties b/recipes/meta-cluster-manager/src/main/resources/2by2localMixedModels.properties
new file mode 100644
index 0000000..3971375
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/2by2localMixedModels.properties
@@ -0,0 +1,52 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Meta Cluster
+#
+meta.cluster=meta
+meta.managed=managed
+meta.address=localhost:2199
+
+meta.target.type=static
+meta.target.database=3
+meta.target.webserver=5
+
+meta.status.type=local
+
+meta.provider.type=local
+meta.provider.name=provider_0
+meta.provider.address=localhost:2199
+meta.provider.cluster=managed
+
+meta.provider.containers=database,webserver
+
+meta.provider.container.database.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+meta.provider.container.database.prop1=foo
+meta.provider.container.database.prop2=bar
+
+meta.provider.container.webserver.class=org.apache.helix.metamanager.impl.container.DummyOnlineOfflineProcess
+meta.provider.container.webserver.prop1=foo
+meta.provider.container.webserver.prop2=bar
+
+#
+# Managed Cluster
+#
+managed.cluster=managed
+managed.address=localhost:2199
+
+managed.resources=dbprod,wsprod
+
+managed.resource.dbprod.container=database
+managed.resource.dbprod.model=MasterSlave
+managed.resource.dbprod.partitions=8
+managed.resource.dbprod.replica=3
+
+managed.resource.wsprod.container=webserver
+managed.resource.wsprod.model=OnlineOffline
+managed.resource.wsprod.partitions=15
+managed.resource.wsprod.replica=1

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/2by2shell.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/2by2shell.properties b/recipes/meta-cluster-manager/src/main/resources/2by2shell.properties
new file mode 100644
index 0000000..a26f250
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/2by2shell.properties
@@ -0,0 +1,52 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Meta Cluster
+#
+meta.cluster=meta
+meta.managed=managed
+meta.address=localhost:2199
+
+meta.target.type=static
+meta.target.database=3
+meta.target.webserver=5
+
+meta.status.type=shell
+
+meta.provider.type=shell
+meta.provider.name=provider_0
+meta.provider.address=localhost:2199
+meta.provider.cluster=managed
+
+meta.provider.containers=database,webserver
+
+meta.provider.container.database.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+meta.provider.container.database.prop1=foo
+meta.provider.container.database.prop2=bar
+
+meta.provider.container.webserver.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+meta.provider.container.webserver.prop1=foo
+meta.provider.container.webserver.prop2=bar
+
+#
+# Managed Cluster
+#
+managed.cluster=managed
+managed.address=localhost:2199
+
+managed.resources=dbprod,wsprod
+
+managed.resource.dbprod.container=database
+managed.resource.dbprod.model=MasterSlave
+managed.resource.dbprod.partitions=8
+managed.resource.dbprod.replica=3
+
+managed.resource.wsprod.container=webserver
+managed.resource.wsprod.model=MasterSlave
+managed.resource.wsprod.partitions=15
+managed.resource.wsprod.replica=1

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/2by2yarn.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/2by2yarn.properties b/recipes/meta-cluster-manager/src/main/resources/2by2yarn.properties
new file mode 100644
index 0000000..6afd2c6
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/2by2yarn.properties
@@ -0,0 +1,58 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Meta Cluster
+#
+meta.cluster=meta
+meta.managed=managed
+meta.address=rm:2199
+
+meta.target.type=static
+meta.target.database=3
+meta.target.webserver=5
+
+meta.status.type=yarn
+meta.status.metadata=rm:2199
+
+meta.provider.type=yarn
+meta.provider.name=provider_0
+meta.provider.address=rm:2199
+meta.provider.cluster=managed
+meta.provider.metadata=rm:2199
+meta.provider.resourcemananger=rm:8032
+meta.provider.scheduler=rm:8030
+meta.provider.user=yarn
+meta.provider.hdfs=hdfs://rm:9000/
+
+meta.provider.containers=database,webserver
+
+meta.provider.container.database.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+meta.provider.container.database.prop1=foo
+meta.provider.container.database.prop2=bar
+
+meta.provider.container.webserver.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+meta.provider.container.webserver.prop1=foo
+meta.provider.container.webserver.prop2=bar
+
+#
+# Managed Cluster
+#
+managed.cluster=managed
+managed.address=rm:2199
+
+managed.resources=dbprod,wsprod
+
+managed.resource.dbprod.container=database
+managed.resource.dbprod.model=MasterSlave
+managed.resource.dbprod.partitions=8
+managed.resource.dbprod.replica=3
+
+managed.resource.wsprod.container=webserver
+managed.resource.wsprod.model=MasterSlave
+managed.resource.wsprod.partitions=15
+managed.resource.wsprod.replica=1

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/2by2yarnZookeeper.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/2by2yarnZookeeper.properties b/recipes/meta-cluster-manager/src/main/resources/2by2yarnZookeeper.properties
new file mode 100644
index 0000000..66f3637
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/2by2yarnZookeeper.properties
@@ -0,0 +1,58 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Meta Cluster
+#
+meta.cluster=meta
+meta.managed=managed
+meta.address=rm:2199
+
+meta.target.type=static
+meta.target.database=3
+meta.target.webserver=5
+
+meta.status.type=yarn
+meta.status.metadata=rm:2199
+
+meta.provider.type=yarn
+meta.provider.name=provider_0
+meta.provider.address=rm:2199
+meta.provider.cluster=managed
+meta.provider.metadata=rm:2199
+meta.provider.resourcemananger=rm:8032
+meta.provider.scheduler=rm:8030
+meta.provider.user=yarn
+meta.provider.hdfs=hdfs://rm:9000/
+
+meta.provider.containers=database,webserver
+
+meta.provider.container.database.class=org.apache.helix.metamanager.impl.container.ZookeeperMasterSlaveProcess
+meta.provider.container.database.address=rm:2199
+meta.provider.container.database.root=mydatabase
+
+meta.provider.container.webserver.class=org.apache.helix.metamanager.impl.container.ZookeeperMasterSlaveProcess
+meta.provider.container.webserver.address=rm:2199
+meta.provider.container.webserver.root=mywebserver
+
+#
+# Managed Cluster
+#
+managed.cluster=managed
+managed.address=rm:2199
+
+managed.resources=dbprod,wsprod
+
+managed.resource.dbprod.container=database
+managed.resource.dbprod.model=MasterSlave
+managed.resource.dbprod.partitions=8
+managed.resource.dbprod.replica=3
+
+managed.resource.wsprod.container=webserver
+managed.resource.wsprod.model=MasterSlave
+managed.resource.wsprod.partitions=15
+managed.resource.wsprod.replica=1

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/2meta2managed.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/2meta2managed.properties b/recipes/meta-cluster-manager/src/main/resources/2meta2managed.properties
new file mode 100644
index 0000000..b719620
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/2meta2managed.properties
@@ -0,0 +1,52 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Meta Cluster
+#
+meta.cluster=meta
+meta.managed=managed
+meta.address=localhost:2199
+
+meta.target.type=static
+meta.target.database=3
+meta.target.webserver=5
+
+meta.status.type=local
+
+meta.provider.type=local
+meta.provider.name=provider_0
+meta.provider.address=localhost:2199
+meta.provider.cluster=managed
+
+meta.provider.containers=database,webserver
+
+meta.provider.container.database.class=org.apache.helix.metamanager.container.impl.DummyProcess
+meta.provider.container.database.prop1=foo
+meta.provider.container.database.prop2=bar
+
+meta.provider.container.webserver.class=org.apache.helix.metamanager.container.impl.DummyProcess
+meta.provider.container.webserver.prop1=foo
+meta.provider.container.webserver.prop2=bar
+
+#
+# Managed Cluster
+#
+managed.cluster=managed
+managed.address=localhost:2199
+
+managed.resources=dbprod,wsprod
+
+managed.resource.dbprod.container=database
+managed.resource.dbprod.model=MasterSlave
+managed.resource.dbprod.partitions=8
+managed.resource.dbprod.replica=3
+
+managed.resource.wsprod.container=webserver
+managed.resource.wsprod.model=MasterSlave
+managed.resource.wsprod.partitions=15
+managed.resource.wsprod.replica=1

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Local.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/Boot2By2Local.properties b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Local.properties
new file mode 100644
index 0000000..4eb07bd
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Local.properties
@@ -0,0 +1,87 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=localhost:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=localhost:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=localhost:2199
+metaresource.0.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=localhost:2199
+metaresource.1.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=localhost:2199
+metaprovider.0.class=org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=localhost:2199
+metaprovider.1.class=org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.metamanager.impl.local.LocalStatusProvider
+
+metacontroller.target.class=org.apache.helix.metamanager.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3


[12/15] Adding Helix-task-framework and Yarn integration modules

Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ZookeeperService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ZookeeperService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ZookeeperService.java
new file mode 100644
index 0000000..8d78f9b
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ZookeeperService.java
@@ -0,0 +1,64 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.io.File;
+import java.util.Properties;
+
+import org.I0Itec.zkclient.IDefaultNameSpace;
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkServer;
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.autoscale.Service;
+import org.apache.log4j.Logger;
+
+/**
+ * Bootstrapping zookeeper. Convenience tool for creating standalone zookeeper
+ * instance for test deployments. For production use a separate zookeeper
+ * cluster is strongly recommended.
+ * 
+ */
+public class ZookeeperService implements Service {
+
+    static final Logger log = Logger.getLogger(ZookeeperService.class);
+
+    String              dataDir;
+    String              logDir;
+    int                 port;
+
+    ZkServer            server;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        dataDir = properties.getProperty("datadir", "/tmp/zk/data");
+        logDir = properties.getProperty("logdir", "/tmp/zk/log");
+        port = Integer.parseInt(properties.getProperty("port", "2199"));
+    }
+
+    @Override
+    public void start() {
+        log.info(String.format("starting zookeeper service (dataDir='%s', logDir='%s', port=%d)", dataDir, logDir, port));
+
+        FileUtils.deleteQuietly(new File(dataDir));
+        FileUtils.deleteQuietly(new File(logDir));
+
+        IDefaultNameSpace defaultNameSpace = new IDefaultNameSpace() {
+            @Override
+            public void createDefaultNameSpace(ZkClient zkClient) {
+                // left blank
+            }
+        };
+
+        server = new ZkServer(dataDir, logDir, defaultNameSpace, port);
+        server.start();
+    }
+
+    @Override
+    public void stop() {
+        log.info("stopping zookeeper service");
+
+        if (server != null) {
+            server.shutdown();
+            server = null;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerProcess.java
new file mode 100644
index 0000000..343e426
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerProcess.java
@@ -0,0 +1,133 @@
+package org.apache.helix.autoscale.container;
+
+import java.util.Properties;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base service for spawn-able container types. Configure from Properties and
+ * instantiates Helix participant to managed cluster.
+ * 
+ */
+public abstract class ContainerProcess implements Service {
+    static final Logger                             log    = Logger.getLogger(ContainerProcess.class);
+
+    private ContainerProcessProperties              properties;
+    private HelixManager                            participantManager;
+
+    private String                                  modelName;
+    private StateModelFactory<? extends StateModel> modelFactory;
+
+    private String                                  instanceName;
+    private String                                  clusterName;
+    private String                                  zookeeperAddress;
+
+    private boolean                                 active = false;
+    private boolean                                 failed = false;
+
+    public final void setModelName(String modelName) {
+        this.modelName = modelName;
+    }
+
+    public final void setModelFactory(StateModelFactory<? extends StateModel> modelFactory) {
+        this.modelFactory = modelFactory;
+    }
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        ContainerProcessProperties containerProps = new ContainerProcessProperties();
+        containerProps.putAll(properties);
+        Preconditions.checkArgument(containerProps.isValid());
+
+        this.properties = containerProps;
+        this.instanceName = containerProps.getName();
+        this.clusterName = containerProps.getCluster();
+        this.zookeeperAddress = containerProps.getAddress();
+    }
+
+    @Override
+    public final void start() {
+        try {
+            Preconditions.checkNotNull(modelName, "state model name not set");
+            Preconditions.checkNotNull(modelFactory, "state model factory not set");
+            Preconditions.checkState(properties.isValid(), "process properties not valid: %s", properties.toString());
+
+            log.info(String.format("starting container '%s'", instanceName));
+            startContainer();
+
+            log.info(String.format("starting helix participant '%s'", instanceName));
+            startParticipant();
+
+            active = true;
+
+        } catch (Exception e) {
+            log.error(String.format("starting container '%s' failed", instanceName), e);
+            fail();
+        }
+    }
+
+    protected abstract void startContainer() throws Exception;
+
+    private final void startParticipant() throws Exception {
+        participantManager = HelixManagerFactory.getZKHelixManager(clusterName, instanceName, InstanceType.PARTICIPANT, zookeeperAddress);
+        participantManager.getStateMachineEngine().registerStateModelFactory(modelName, modelFactory);
+        participantManager.connect();
+    }
+
+    @Override
+    public final void stop() {
+        try {
+            log.info(String.format("stopping helix participant '%s'", instanceName));
+            stopParticipant();
+
+            log.info(String.format("stopping container '%s'", instanceName));
+            stopContainer();
+
+            active = false;
+
+        } catch (Exception e) {
+            log.warn(String.format("stopping container '%s' failed", instanceName), e);
+        }
+    }
+
+    protected abstract void stopContainer() throws Exception;
+
+    private final void stopParticipant() {
+        if (participantManager != null) {
+            participantManager.disconnect();
+        }
+    }
+
+    public final void fail() {
+        failed = true;
+    }
+
+    public final boolean isActive() {
+        return active && !failed;
+    }
+
+    public final boolean isFailed() {
+        return failed;
+    }
+
+    public final ContainerProcessProperties getProperties() {
+        return properties;
+    }
+
+    String getModelName() {
+        return modelName;
+    }
+
+    StateModelFactory<? extends StateModel> getModelFactory() {
+        return modelFactory;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerProcessProperties.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerProcessProperties.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerProcessProperties.java
new file mode 100644
index 0000000..1096174
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerProcessProperties.java
@@ -0,0 +1,66 @@
+package org.apache.helix.autoscale.container;
+
+import java.util.Properties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base configuration for ContainerProcess. 
+ * 
+ */
+public class ContainerProcessProperties extends Properties {
+    /**
+	 * 
+	 */
+    private static final long  serialVersionUID = 5754863079470995536L;
+
+    public static final String CLUSTER          = "cluster";
+    public static final String ADDRESS          = "address";
+    public static final String NAME             = "name";
+    public static final String CONTAINER_CLASS  = "class";
+
+    public ContainerProcessProperties() {
+        // left blank
+    }
+
+    public ContainerProcessProperties(Properties properties) {
+        Preconditions.checkNotNull(properties);
+        putAll(properties);
+    }
+	
+	public boolean isValid() {
+		return containsKey(CLUSTER) &&
+			   containsKey(NAME) &&
+			   containsKey(ADDRESS) &&
+			   containsKey(CONTAINER_CLASS);
+	}
+	
+    public String getCluster() {
+        return getProperty(CLUSTER);
+    }
+
+    public String getAddress() {
+        return getProperty(ADDRESS);
+    }
+
+    public String getName() {
+        return getProperty(NAME);
+    }
+
+    public String getContainerClass() {
+        return getProperty(CONTAINER_CLASS);
+    }
+
+    @Override
+    public synchronized Object get(Object key) {
+        Preconditions.checkState(containsKey(key));
+        return super.get(key);
+    }
+
+    @Override
+    public String getProperty(String key) {
+        Preconditions.checkState(containsKey(key));
+        return super.getProperty(key);
+    }
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerUtils.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerUtils.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerUtils.java
new file mode 100644
index 0000000..8bab01e
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerUtils.java
@@ -0,0 +1,46 @@
+package org.apache.helix.autoscale.container;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.lang.reflect.Constructor;
+
+import org.apache.log4j.Logger;
+
+/**
+ * Utility for loading ContainerProperties and spawning ContainerProcess.
+ * 
+ */
+public class ContainerUtils {
+
+    static final Logger log = Logger.getLogger(ContainerUtils.class);
+
+    private ContainerUtils() {
+        // left blank
+    }
+
+    public static ContainerProcess createProcess(ContainerProcessProperties properties) throws Exception {
+        String containerClassName = properties.getContainerClass();
+
+        Class<?> containerClass = Class.forName(containerClassName);
+
+        log.debug(String.format("checking for properties constructor in class '%s'", containerClassName));
+
+        Constructor<?> constructor = containerClass.getConstructor(ContainerProcessProperties.class);
+
+        return (ContainerProcess) constructor.newInstance(properties);
+    }
+
+    public static ContainerProcessProperties getPropertiesFromResource(String resourceName) throws IOException {
+        ContainerProcessProperties properties = new ContainerProcessProperties();
+        properties.load(ClassLoader.getSystemResourceAsStream(resourceName));
+        return properties;
+    }
+
+    public static ContainerProcessProperties getPropertiesFromPath(String filePath) throws IOException {
+        ContainerProcessProperties properties = new ContainerProcessProperties();
+        properties.load(new InputStreamReader(new FileInputStream(filePath)));
+        return properties;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/FileTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/FileTargetProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/FileTargetProvider.java
new file mode 100644
index 0000000..ebbf4b6
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/FileTargetProvider.java
@@ -0,0 +1,51 @@
+package org.apache.helix.autoscale.impl;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.helix.autoscale.TargetProviderService;
+
+/**
+ * File-based target model. Container count is extracted from properties file. Count may change dynamically.
+ * 
+ */
+public class FileTargetProvider implements TargetProviderService {
+
+    File file;
+
+    public FileTargetProvider() {
+        // left blank
+    }
+
+    public FileTargetProvider(String path) {
+        this.file = new File(path);
+    }
+
+    @Override
+    public int getTargetContainerCount(String containerType) throws FileNotFoundException, IOException, IllegalArgumentException {
+        Properties properties = new Properties();
+        properties.load(new FileReader(file));
+        if (!properties.contains(containerType))
+            throw new IllegalArgumentException(String.format("container type '%s' not found in '%s'", containerType, file.getCanonicalPath()));
+        return Integer.parseInt((String) properties.get(containerType));
+    }
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        this.file = new File(properties.getProperty("path"));
+    }
+
+    @Override
+    public void start() throws Exception {
+        // left blank
+    }
+
+    @Override
+    public void stop() throws Exception {
+        // left blank
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/RedisTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/RedisTargetProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/RedisTargetProvider.java
new file mode 100644
index 0000000..723ac4d
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/RedisTargetProvider.java
@@ -0,0 +1,356 @@
+package org.apache.helix.autoscale.impl;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.autoscale.TargetProviderService;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Redis-specific target model based on recurring Tps benchmarking. Tps target
+ * and probed redis-server instances are configured via zookeeper. Tps target
+ * may change dynamically.
+ * 
+ */
+public class RedisTargetProvider implements TargetProviderService {
+
+    static final Logger        log               = Logger.getLogger(RedisTargetProvider.class);
+
+    public static final String BENCHMARK_COMMAND = "redis-benchmark";
+    public static final String BENCHMARK_TESTS   = "GET,SET";
+
+    public static final String DEFAULT_RECORDS   = "100000";
+    public static final String DEFAULT_CLIENTS   = "20";
+    public static final String DEFAULT_REQUESTS  = "100000";
+    public static final String DEFAULT_TIMEOUT   = "8000";
+    public static final String DEFAULT_INTERVAL  = "10000";
+    public static final String DEFAULT_ALPHA     = "0.25";
+
+    ZkClient                   zookeeper;
+
+    String                     address;
+    String                     root;
+
+    int                        records;
+    int                        clients;
+    int                        requests;
+    int                        timeout;
+    int                        interval;
+
+    int                        targetTpsGet;
+    int                        targetTpsSet;
+
+    int                        targetCountMin;
+    int                        targetCountMax;
+    int                        targetCount;
+
+    double                     alpha;
+    double                     averageTpsGet;
+    double                     averageTpsSet;
+    double                     averageCount;
+
+    ScheduledExecutorService   executor;
+
+    @Override
+    public void configure(Properties properties) {
+        address = properties.getProperty("address");
+        root = properties.getProperty("root");
+        targetTpsGet = Integer.valueOf(properties.getProperty("get", "0"));
+        targetTpsSet = Integer.valueOf(properties.getProperty("set", "0"));
+        targetCountMin = Integer.valueOf(properties.getProperty("min", "-1"));
+        targetCountMax = Integer.valueOf(properties.getProperty("max", "-1"));
+        records = Integer.valueOf(properties.getProperty("records", DEFAULT_RECORDS));
+        clients = Integer.valueOf(properties.getProperty("clients", DEFAULT_CLIENTS));
+        requests = Integer.valueOf(properties.getProperty("requests", DEFAULT_REQUESTS));
+        timeout = Integer.valueOf(properties.getProperty("timeout", DEFAULT_TIMEOUT));
+        interval = Integer.valueOf(properties.getProperty("interval", DEFAULT_INTERVAL));
+        alpha = Double.valueOf(properties.getProperty("alpha", DEFAULT_ALPHA));
+    }
+
+    @Override
+    public void start() {
+        log.debug("starting redis status service");
+        zookeeper = new ZkClient(address);
+        zookeeper.createPersistent("/" + root, true);
+
+        try { zookeeper.createPersistent("/" + root + "/target.get", String.valueOf(targetTpsGet)); } catch (Exception ignore) {}
+        try { zookeeper.createPersistent("/" + root + "/target.set", String.valueOf(targetTpsSet)); } catch (Exception ignore) {}
+        try { zookeeper.createPersistent("/" + root + "/target.min", String.valueOf(targetCountMin)); } catch (Exception ignore) {}
+        try { zookeeper.createPersistent("/" + root + "/target.max", String.valueOf(targetCountMax)); } catch (Exception ignore) {}
+ 
+        executor = Executors.newSingleThreadScheduledExecutor();
+        executor.scheduleAtFixedRate(new RedisBenchmarkRunnable(), 0, interval, TimeUnit.MILLISECONDS);
+    }
+
+    @Override
+    public void stop() {
+        log.debug("stopping redis status service");
+        if (executor != null) {
+            executor.shutdownNow();
+            while (!executor.isTerminated()) {
+                try {
+                    Thread.sleep(100);
+                } catch (InterruptedException e) {
+                    // ignore
+                }
+            }
+            executor = null;
+        }
+        if (zookeeper != null) {
+            zookeeper.close();
+            zookeeper = null;
+        }
+    }
+
+    @Override
+    public int getTargetContainerCount(String containerType) throws Exception {
+        return targetCount;
+    }
+
+    private class RedisBenchmarkRunnable implements Runnable {
+        ExecutorService executor = Executors.newCachedThreadPool();
+        RedisResult     aggregateResult;
+
+        @Override
+        public void run() {
+            log.debug("running redis benchmark");
+
+            aggregateResult = new RedisResult(0);
+            Collection<Future<RedisResult>> futures = new ArrayList<Future<RedisResult>>();
+
+            try {
+                Collection<RedisTarget> targets = getTargets();
+
+                // start benchmark
+                for (RedisTarget target : targets) {
+                    log.debug(String.format("submitting target '%s'", target));
+                    Future<RedisResult> future = executor.submit(new RedisCallable(target));
+                    futures.add(future);
+                }
+
+                // aggregate results
+                try {
+                    log.debug("waiting for results");
+
+                    long limit = System.currentTimeMillis() + timeout;
+                    for (Future<RedisResult> future : futures) {
+                        try {
+                            RedisResult result = future.get(limit - System.currentTimeMillis(), TimeUnit.MILLISECONDS);
+                            log.debug(String.format("got result '%s'", result));
+                            aggregate(result);
+                        } catch (Exception e) {
+                            log.warn(String.format("failed to get result"));
+                            future.cancel(true);
+                        }
+                    }
+                } catch (Exception e) {
+                    log.error("Error running redis benchmark", e);
+
+                    for (Future<RedisResult> future : futures) {
+                        future.cancel(true);
+                    }
+
+                    return;
+                }
+
+                // compare to thresholds
+                log.debug(String.format("aggregate result is '%s'", aggregateResult));
+
+                // get target from zookeeper
+                try { targetTpsGet = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.get")); } catch (Exception ignore) {}
+                try { targetTpsSet = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.set")); } catch (Exception ignore) {}
+                try { targetCountMin = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.min")); } catch (Exception ignore) {}
+                try { targetCountMax = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.max")); } catch (Exception ignore) {}
+
+                averageCount = alpha * aggregateResult.serverCount + (1.0 - alpha) * averageCount;
+
+                // calculate counts
+                int targetCountGet = -1;
+                if (aggregateResult.containsKey("GET")) {
+                    double tpsTarget = targetTpsGet;
+                    double tps = aggregateResult.get("GET");
+
+                    averageTpsGet = alpha * tps + (1.0 - alpha) * averageTpsGet;
+
+                    targetCountGet = (int) Math.ceil(tpsTarget / averageTpsGet * averageCount);
+                    log.debug(String.format("count.get=%d, target.get=%f, tps.get=%f, tps.avg.get=%f, count.avg=%f", targetCountGet, tpsTarget, tps,
+                            averageTpsGet, averageCount));
+                }
+
+                int targetCountSet = -1;
+                if (aggregateResult.containsKey("SET")) {
+                    double tpsTarget = targetTpsSet;
+                    double tps = aggregateResult.get("SET");
+
+                    averageTpsSet = alpha * tps + (1.0 - alpha) * averageTpsSet;
+
+                    targetCountSet = (int) Math.ceil(tpsTarget / averageTpsSet * averageCount);
+                    log.debug(String.format("count.set=%d, target.set=%f, tps.set=%f, tps.avg.set=%f, count.avg=%f", targetCountSet, tpsTarget, tps,
+                            averageTpsSet, averageCount));
+                }
+
+                targetCount = Math.max(targetCountGet, targetCountSet);
+
+                if (targetCountMin > 0)
+                    targetCount = Math.max(targetCount, targetCountMin);
+                if (targetCountMax > 0)
+                    targetCount = Math.min(targetCount, targetCountMax);
+
+                targetCount = Math.max(targetCount, 1);
+
+                log.debug(String.format("target count is %d", targetCount));
+                RedisTargetProvider.this.targetCount = targetCount;
+
+            } catch (Exception e) {
+                log.error("Error running redis benchmark", e);
+
+                for (Future<RedisResult> future : futures) {
+                    future.cancel(true);
+                }
+            }
+
+        }
+
+        Collection<RedisTarget> getTargets() {
+            log.debug("fetching redis servers from zookeeper");
+            Collection<RedisTarget> targets = new ArrayList<RedisTarget>();
+            Collection<String> servers = zookeeper.getChildren("/" + root);
+
+            servers.remove("target.get");
+            servers.remove("target.set");
+            servers.remove("target.min");
+            servers.remove("target.max");
+
+            for (String server : servers) {
+                if (!zookeeper.exists("/" + root + "/" + server + "/heartbeat"))
+                    continue;
+
+                String hostname = zookeeper.readData("/" + root + "/" + server + "/hostname");
+                int port = Integer.valueOf(zookeeper.<String> readData("/" + root + "/" + server + "/port"));
+
+                targets.add(new RedisTarget(hostname, port));
+            }
+
+            log.debug(String.format("found %d servers: %s", targets.size(), targets));
+            return targets;
+        }
+
+        void aggregate(RedisResult result) {
+            RedisResult newResult = new RedisResult(aggregateResult.serverCount + result.serverCount);
+
+            for (Entry<String, Double> entry : result.entrySet()) {
+                double current = 0.0d;
+                if (aggregateResult.containsKey(entry.getKey()))
+                    current = aggregateResult.get(entry.getKey());
+
+                current += entry.getValue();
+                newResult.put(entry.getKey(), current);
+            }
+
+            aggregateResult = newResult;
+        }
+    }
+
+    private static class RedisTarget {
+        final String hostname;
+        final int    port;
+
+        public RedisTarget(String hostname, int port) {
+            this.hostname = hostname;
+            this.port = port;
+        }
+
+        @Override
+        public String toString() {
+            return String.format("%s:%d", hostname, port);
+        }
+    }
+
+    private static class RedisResult extends HashMap<String, Double> {
+        /**
+         * 
+         */
+        private static final long serialVersionUID = 4599748807597500952L;
+
+        final int                 serverCount;
+
+        public RedisResult(int serverCount) {
+            this.serverCount = serverCount;
+        }
+
+        @Override
+        public String toString() {
+            return String.format("[serverCount=%d %s]", serverCount, super.toString());
+        }
+    }
+
+    private class RedisCallable implements Callable<RedisResult> {
+        final RedisTarget target;
+
+        public RedisCallable(RedisTarget target) {
+            this.target = target;
+        }
+
+        @Override
+        public RedisResult call() throws Exception {
+            log.debug(String.format("executing benchmark for '%s'", target));
+
+            ProcessBuilder builder = new ProcessBuilder();
+            builder.command(BENCHMARK_COMMAND, "-h", target.hostname, "-p", String.valueOf(target.port), "-r", String.valueOf(records), "-n",
+                    String.valueOf(requests), "-c", String.valueOf(clients), "-t", BENCHMARK_TESTS, "--csv");
+            Process process = builder.start();
+
+            log.debug(String.format("running '%s'", builder.command()));
+
+            RedisResult result = new RedisResult(1);
+
+            int retVal;
+            try {
+                retVal = process.waitFor();
+            } catch (InterruptedException e) {
+                process.destroy();
+                return result;
+            }
+
+            Preconditions.checkState(retVal == 0, "Benchmark process returned %s", retVal);
+
+            Pattern pattern = Pattern.compile("\"([A-Z0-9_]+).*\",\"([0-9\\.]+)\"");
+
+            log.debug("parsing output");
+            BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));
+            String line = null;
+            while ((line = reader.readLine()) != null) {
+                Matcher matcher = pattern.matcher(line);
+
+                if (!matcher.find())
+                    continue;
+
+                String key = matcher.group(1);
+                Double value = Double.valueOf(matcher.group(2));
+
+                result.put(key, value);
+            }
+
+            log.debug(String.format("benchmark for '%s' returned '%s'", target, result));
+
+            return result;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/StaticTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/StaticTargetProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/StaticTargetProvider.java
new file mode 100644
index 0000000..346f0fe
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/StaticTargetProvider.java
@@ -0,0 +1,62 @@
+package org.apache.helix.autoscale.impl;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.apache.helix.autoscale.TargetProviderService;
+import org.apache.log4j.Logger;
+
+/**
+ * Target model based on manually set count. Count may change dynamically.
+ * 
+ */
+public class StaticTargetProvider implements TargetProviderService {
+    static final Logger        log          = Logger.getLogger(StaticTargetProvider.class);
+
+    final Map<String, Integer> targetCounts = new HashMap<String, Integer>();
+
+    public StaticTargetProvider() {
+        // left blank
+    }
+
+    public StaticTargetProvider(Map<String, Integer> targetCounts) {
+        this.targetCounts.putAll(targetCounts);
+    }
+
+    @Override
+    public int getTargetContainerCount(String containerType) {
+        return targetCounts.get(containerType);
+    }
+
+    public void setTargetContainerCount(String containerType, int targetCount) {
+        targetCounts.put(containerType, targetCount);
+    }
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        for (Entry<Object, Object> entry : properties.entrySet()) {
+            String key = (String) entry.getKey();
+
+            try {
+                int value = Integer.valueOf((String) entry.getValue());
+                log.debug(String.format("Inserting value '%s = %d'", key, value));
+                targetCounts.put(key, value);
+            } catch (NumberFormatException e) {
+                log.warn(String.format("Skipping '%s', not an integer (value='%s')", key, (String) entry.getValue()));
+            }
+        }
+    }
+
+    @Override
+    public void start() throws Exception {
+        // left blank
+    }
+
+    @Override
+    public void stop() throws Exception {
+        // left blank
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/DummyMasterSlaveProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/DummyMasterSlaveProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/DummyMasterSlaveProcess.java
new file mode 100644
index 0000000..683249d
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/DummyMasterSlaveProcess.java
@@ -0,0 +1,76 @@
+package org.apache.helix.autoscale.impl.container;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.autoscale.container.ContainerProcess;
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Sample implementation of container with Helix participant for MasterSlave
+ * state model. Print state transitions only.
+ * 
+ */
+public class DummyMasterSlaveProcess extends ContainerProcess {
+
+    static final Logger log = Logger.getLogger(DummyMasterSlaveProcess.class);
+
+    public DummyMasterSlaveProcess(ContainerProcessProperties properties) throws Exception {
+        configure(properties);
+        setModelName("MasterSlave");
+        setModelFactory(new DummyMasterSlaveModelFactory());
+    }
+
+    @Override
+    protected void startContainer() throws Exception {
+        log.info("starting dummy process container");
+    }
+
+    @Override
+    protected void stopContainer() throws Exception {
+        log.info("stopping dummy process container");
+    }
+
+    public static class DummyMasterSlaveModelFactory extends StateModelFactory<DummyMasterSlaveStateModel> {
+        @Override
+        public DummyMasterSlaveStateModel createNewStateModel(String partitionName) {
+            return new DummyMasterSlaveStateModel();
+        }
+    }
+
+    @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+    public static class DummyMasterSlaveStateModel extends StateModel {
+
+        static final Logger log = Logger.getLogger(DummyMasterSlaveStateModel.class);
+
+        @Transition(from = "OFFLINE", to = "SLAVE")
+        public void offlineToSlave(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from OFFLINE to SLAVE", context.getManager().getInstanceName()));
+        }
+
+        @Transition(from = "SLAVE", to = "OFFLINE")
+        public void slaveToOffline(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from SLAVE to OFFLINE", context.getManager().getInstanceName()));
+        }
+
+        @Transition(from = "SLAVE", to = "MASTER")
+        public void slaveToMaster(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from SLAVE to MASTER", context.getManager().getInstanceName()));
+        }
+
+        @Transition(from = "MASTER", to = "SLAVE")
+        public void masterToSlave(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from MASTER to SLAVE", context.getManager().getInstanceName()));
+        }
+
+        @Transition(from = "OFFLINE", to = "DROPPED")
+        public void offlineToDropped(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from OFFLINE to DROPPED", context.getManager().getInstanceName()));
+        }
+
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/DummyOnlineOfflineProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/DummyOnlineOfflineProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/DummyOnlineOfflineProcess.java
new file mode 100644
index 0000000..a0aad8e
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/DummyOnlineOfflineProcess.java
@@ -0,0 +1,66 @@
+package org.apache.helix.autoscale.impl.container;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.autoscale.container.ContainerProcess;
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Sample implementation of container with Helix participant for OnlineOffline
+ * state model. Print state transitions only.
+ * 
+ */
+public class DummyOnlineOfflineProcess extends ContainerProcess {
+
+    static final Logger log = Logger.getLogger(DummyOnlineOfflineProcess.class);
+
+    public DummyOnlineOfflineProcess(ContainerProcessProperties properties) throws Exception {
+        configure(properties);
+        setModelName("OnlineOffline");
+        setModelFactory(new DummyOnlineOfflineModelFactory());
+    }
+
+    @Override
+    protected void startContainer() throws Exception {
+        log.info("starting dummy online-offline process container");
+    }
+
+    @Override
+    protected void stopContainer() throws Exception {
+        log.info("stopping dummy online-offline process container");
+    }
+
+    public static class DummyOnlineOfflineModelFactory extends StateModelFactory<DummyOnlineOfflineStateModel> {
+        @Override
+        public DummyOnlineOfflineStateModel createNewStateModel(String partitionName) {
+            return new DummyOnlineOfflineStateModel();
+        }
+    }
+
+    @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE", "DROPPED" })
+    public static class DummyOnlineOfflineStateModel extends StateModel {
+
+        static final Logger log = Logger.getLogger(DummyOnlineOfflineStateModel.class);
+
+        @Transition(from = "OFFLINE", to = "ONLINE")
+        public void offlineToOnline(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from OFFLINE to ONLINE", context.getManager().getInstanceName()));
+        }
+
+        @Transition(from = "ONLINE", to = "OFFLINE")
+        public void onlineToOffline(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from ONLINE to OFFLINE", context.getManager().getInstanceName()));
+        }
+
+        @Transition(from = "OFFLINE", to = "DROPPED")
+        public void offlineToDropped(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from OFFLINE to DROPPED", context.getManager().getInstanceName()));
+        }
+
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/RedisServerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/RedisServerProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/RedisServerProcess.java
new file mode 100644
index 0000000..5f6f745
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/RedisServerProcess.java
@@ -0,0 +1,140 @@
+package org.apache.helix.autoscale.impl.container;
+
+import java.net.InetAddress;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.NotificationContext;
+import org.apache.helix.autoscale.container.ContainerProcess;
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Container implementation for redis-server. Uses OnlineOffline model, spawns
+ * Redis as Shell process and writes metadata to zookeeper.
+ * 
+ */
+public class RedisServerProcess extends ContainerProcess {
+
+    static final Logger        log                  = Logger.getLogger(RedisServerProcess.class);
+
+    public static final String REDIS_SERVER_COMMAND = "redis-server";
+
+    public static final long   MONITOR_INTERVAL     = 5000;
+
+    ZkClient                   zookeeper;
+
+    final String               address;
+    final String               root;
+    final String               name;
+    final int                  basePort;
+
+    Process                    process;
+
+    ScheduledExecutorService   executor;
+
+    public RedisServerProcess(ContainerProcessProperties properties) throws Exception {
+        configure(properties);
+        setModelName("OnlineOffline");
+        setModelFactory(new RedisServerModelFactory());
+
+        address = properties.getProperty("address");
+        root = properties.getProperty("root");
+        basePort = Integer.valueOf(properties.getProperty("baseport"));
+        name = properties.getProperty(ContainerProcessProperties.NAME);
+    }
+
+    @Override
+    protected void startContainer() throws Exception {
+        log.info(String.format("starting redis server container for instance '%s'", name));
+
+        String hostname = InetAddress.getLocalHost().getHostName();
+        int port = basePort + Integer.valueOf(name.split("_")[1]);
+
+        log.debug(String.format("Starting redis server at '%s:%d'", hostname, port));
+
+        ProcessBuilder builder = new ProcessBuilder();
+        builder.command(REDIS_SERVER_COMMAND, "--port", String.valueOf(port));
+        process = builder.start();
+
+        log.debug("Updating zookeeper");
+        zookeeper = new ZkClient(address);
+        zookeeper.deleteRecursive("/" + root + "/" + name);
+        zookeeper.createPersistent("/" + root + "/" + name, true);
+        zookeeper.createPersistent("/" + root + "/" + name + "/hostname", hostname);
+        zookeeper.createPersistent("/" + root + "/" + name + "/port", String.valueOf(port));
+        zookeeper.createEphemeral("/" + root + "/" + name + "/heartbeat");
+
+        log.debug("Starting process monitor");
+        executor = Executors.newSingleThreadScheduledExecutor();
+        executor.scheduleAtFixedRate(new ProcessMonitor(), 0, MONITOR_INTERVAL, TimeUnit.MILLISECONDS);
+
+    }
+
+    @Override
+    protected void stopContainer() throws Exception {
+        log.info("stopping redis server container");
+
+        log.debug("Stopping process monitor");
+        executor.shutdownNow();
+
+        log.debug("Updating zookeeper");
+        zookeeper.deleteRecursive("/" + root + "/" + name);
+        zookeeper.close();
+
+        log.debug("Stopping process");
+        process.destroy();
+        process.waitFor();
+    }
+
+    public class RedisServerModelFactory extends StateModelFactory<RedisServerModel> {
+        @Override
+        public RedisServerModel createNewStateModel(String partitionName) {
+            return new RedisServerModel();
+        }
+    }
+
+    @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE", "DROPPED" })
+    public class RedisServerModel extends StateModel {
+
+        @Transition(from = "OFFLINE", to = "ONLINE")
+        public void offlineToSlave(Message m, NotificationContext context) {
+            // left blank
+            log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+        }
+
+        @Transition(from = "ONLINE", to = "OFFLINE")
+        public void slaveToOffline(Message m, NotificationContext context) {
+            // left blank
+            log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+        }
+
+        @Transition(from = "OFFLINE", to = "DROPPED")
+        public void offlineToDropped(Message m, NotificationContext context) {
+            // left blank
+            log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+        }
+
+    }
+
+    private class ProcessMonitor implements Runnable {
+        @Override
+        public void run() {
+            try {
+                process.exitValue();
+                log.warn("detected process failure");
+                fail();
+            } catch (Exception e) {
+                // expected
+            }
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/ZookeeperMasterSlaveProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/ZookeeperMasterSlaveProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/ZookeeperMasterSlaveProcess.java
new file mode 100644
index 0000000..0c1b728
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/ZookeeperMasterSlaveProcess.java
@@ -0,0 +1,108 @@
+package org.apache.helix.autoscale.impl.container;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.NotificationContext;
+import org.apache.helix.autoscale.container.ContainerProcess;
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Sample implementation of container with Helix participant for MasterSlave
+ * state model. Writes current state to separate zookeeper domain.
+ * 
+ */
+public class ZookeeperMasterSlaveProcess extends ContainerProcess {
+
+    static final Logger log = Logger.getLogger(ZookeeperMasterSlaveProcess.class);
+
+    ZkClient            zookeeper;
+
+    final String        address;
+    final String        root;
+    final String        name;
+
+    public ZookeeperMasterSlaveProcess(ContainerProcessProperties properties) throws Exception {
+        configure(properties);
+        setModelName("MasterSlave");
+        setModelFactory(new ZookeeperMasterSlaveModelFactory());
+
+        address = properties.getProperty("address");
+        root = properties.getProperty("root");
+        name = properties.getProperty(ContainerProcessProperties.NAME);
+    }
+
+    @Override
+    protected void startContainer() throws Exception {
+        log.info("starting zookeeper process container");
+
+        zookeeper = new ZkClient(address);
+        zookeeper.createPersistent("/" + root + "/" + name, true);
+    }
+
+    @Override
+    protected void stopContainer() throws Exception {
+        log.info("stopping zookeeper process container");
+
+        zookeeper.close();
+    }
+
+    public class ZookeeperMasterSlaveModelFactory extends StateModelFactory<ZookeeperMasterSlaveModel> {
+        @Override
+        public ZookeeperMasterSlaveModel createNewStateModel(String partitionName) {
+            return new ZookeeperMasterSlaveModel();
+        }
+    }
+
+    @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+    public class ZookeeperMasterSlaveModel extends StateModel {
+
+        @Transition(from = "OFFLINE", to = "SLAVE")
+        public void offlineToSlave(Message m, NotificationContext context) {
+            transition(m, context);
+        }
+
+        @Transition(from = "SLAVE", to = "OFFLINE")
+        public void slaveToOffline(Message m, NotificationContext context) {
+            transition(m, context);
+        }
+
+        @Transition(from = "SLAVE", to = "MASTER")
+        public void slaveToMaster(Message m, NotificationContext context) {
+            transition(m, context);
+        }
+
+        @Transition(from = "MASTER", to = "SLAVE")
+        public void masterToSlave(Message m, NotificationContext context) {
+            transition(m, context);
+        }
+
+        @Transition(from = "OFFLINE", to = "DROPPED")
+        public void offlineToDropped(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+
+            String resource = m.getResourceName();
+            String partition = m.getPartitionName();
+            String path = "/" + root + "/" + name + "/" + resource + "_" + partition;
+
+            zookeeper.delete(path);
+        }
+
+        public void transition(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+
+            String resource = m.getResourceName();
+            String partition = m.getPartitionName();
+            String path = "/" + root + "/" + name + "/" + resource + "_" + partition;
+
+            zookeeper.delete(path);
+            zookeeper.createEphemeral(path, m.getToState());
+        }
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerProvider.java
new file mode 100644
index 0000000..7e5d553
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerProvider.java
@@ -0,0 +1,119 @@
+package org.apache.helix.autoscale.impl.local;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.autoscale.ContainerProvider;
+import org.apache.helix.autoscale.ContainerProviderService;
+import org.apache.helix.autoscale.container.ContainerProcess;
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+import org.apache.helix.autoscale.container.ContainerUtils;
+import org.apache.helix.autoscale.impl.local.LocalContainerSingleton.LocalProcess;
+import org.apache.helix.autoscale.provider.ProviderProperties;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * {@link ContainerProvider} spawning VM-local containers. Only works in single-VM
+ * deployments as container metadata is managed via singleton.
+ * 
+ * @see LocalContainerSingleton
+ */
+class LocalContainerProvider implements ContainerProviderService {
+
+    static final Logger           log   = Logger.getLogger(LocalContainerProvider.class);
+
+    final Map<String, Properties> types = new HashMap<String, Properties>();
+
+    String                        address;
+    String                        cluster;
+    String                        name;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        ProviderProperties providerProperties = new ProviderProperties();
+        providerProperties.putAll(properties);
+        Preconditions.checkArgument(providerProperties.isValid());
+
+        this.address = providerProperties.getProperty("address");
+        this.cluster = providerProperties.getProperty("cluster");
+        this.name = providerProperties.getProperty("name");
+
+        for (String containerType : providerProperties.getContainers()) {
+            registerType(containerType, providerProperties.getContainer(containerType));
+        }
+    }
+
+    @Override
+    public void start() throws Exception {
+        // left blank
+    }
+
+    @Override
+    public void stop() throws Exception {
+        destroyAll();
+    }
+
+    @Override
+    public void create(String id, String type) throws Exception {
+        Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            Preconditions.checkState(!processes.containsKey(id), "Process '%s' already exists", id);
+            Preconditions.checkState(types.containsKey(type), "Type '%s' is not registered", type);
+
+            ContainerProcessProperties properties = new ContainerProcessProperties(types.get(type));
+
+            properties.setProperty(ContainerProcessProperties.CLUSTER, cluster);
+            properties.setProperty(ContainerProcessProperties.NAME, id);
+            properties.setProperty(ContainerProcessProperties.ADDRESS, address);
+
+            log.info(String.format("Running container '%s' (properties='%s')", id, properties));
+
+            ContainerProcess process = ContainerUtils.createProcess(properties);
+            process.start();
+
+            processes.put(id, new LocalProcess(id, name, process));
+
+        }
+    }
+
+    @Override
+    public void destroy(String id) throws Exception {
+        Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            if (!processes.containsKey(id))
+                throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+            log.info(String.format("Destroying container '%s'", id));
+
+            LocalProcess local = processes.remove(id);
+
+            local.process.stop();
+        }
+    }
+
+    @Override
+    public void destroyAll() {
+        Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            log.info("Destroying all processes");
+            for (LocalProcess local : new HashSet<LocalProcess>(processes.values())) {
+                if (local.owner.equals(name)) {
+                    try { destroy(local.id); } catch (Exception ignore) {}
+                }
+            }
+        }
+    }
+
+    void registerType(String name, Properties properties) {
+        log.debug(String.format("Registering container type '%s' (properties='%s')", name, properties));
+        types.put(name, properties);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerProviderProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerProviderProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerProviderProcess.java
new file mode 100644
index 0000000..ca1047c
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerProviderProcess.java
@@ -0,0 +1,45 @@
+package org.apache.helix.autoscale.impl.local;
+
+import java.util.Properties;
+
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.provider.ProviderProcess;
+import org.apache.helix.autoscale.provider.ProviderProperties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Configurable and runnable service for {@link LocalContainerProvider}. 
+ *
+ */
+public class LocalContainerProviderProcess implements Service {
+    LocalContainerProvider provider;
+    ProviderProcess        process;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        ProviderProperties providerProperties = new ProviderProperties();
+        providerProperties.putAll(properties);
+        
+        Preconditions.checkArgument(providerProperties.isValid(), "provider properties not valid (properties='%s')", properties);
+        
+        provider = new LocalContainerProvider();
+        provider.configure(properties);
+
+        process = new ProviderProcess();
+        process.configure(providerProperties);
+        process.setConteinerProvider(provider);
+    }
+
+    @Override
+    public void start() throws Exception {
+        provider.start();
+        process.start();
+    }
+
+    @Override
+    public void stop() throws Exception {
+        process.stop();
+        provider.stop();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerSingleton.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerSingleton.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerSingleton.java
new file mode 100644
index 0000000..74f9279
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerSingleton.java
@@ -0,0 +1,56 @@
+package org.apache.helix.autoscale.impl.local;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.helix.autoscale.container.ContainerProcess;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Singleton tracking metadata for VM-local containers spawned via
+ * {@link LocalContainerProvider}.
+ * 
+ */
+public class LocalContainerSingleton {
+    final static Map<String, LocalProcess> processes = new HashMap<String, LocalProcess>();
+
+    private LocalContainerSingleton() {
+        // left blank
+    }
+
+    public static Map<String, LocalProcess> getProcesses() {
+        return processes;
+    }
+
+    public static void reset() {
+        synchronized (processes) {
+            for (LocalProcess local : processes.values()) {
+                local.process.stop();
+            }
+            processes.clear();
+        }
+    }
+
+    public static void killProcess(String id) throws InterruptedException {
+        synchronized (processes) {
+            Preconditions.checkArgument(processes.containsKey(id), "Process '%s' does not exist", id);
+            ContainerProcess process = processes.get(id).process;
+            process.stop();
+            processes.remove(id);
+        }
+    }
+
+    static class LocalProcess {
+        final String           id;
+        final String           owner;
+        final ContainerProcess process;
+
+        public LocalProcess(String id, String owner, ContainerProcess process) {
+            this.id = id;
+            this.owner = owner;
+            this.process = process;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalStatusProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalStatusProvider.java
new file mode 100644
index 0000000..7cb02bb
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalStatusProvider.java
@@ -0,0 +1,53 @@
+package org.apache.helix.autoscale.impl.local;
+
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.autoscale.StatusProviderService;
+import org.apache.helix.autoscale.impl.local.LocalContainerSingleton.LocalProcess;
+
+/**
+ * StatusProvider for VM-local containers spawned via
+ * {@link LocalContainerProvider}. Runnable and configurable service.
+ * 
+ */
+public class LocalStatusProvider implements StatusProviderService {
+
+    @Override
+    public boolean exists(String id) {
+        Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            return processes.containsKey(id);
+        }
+    }
+
+    @Override
+    public boolean isHealthy(String id) {
+        Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            LocalProcess local = processes.get(id);
+
+            if (local == null)
+                return false;
+
+            return local.process.isActive();
+        }
+    }
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        // left blank
+    }
+
+    @Override
+    public void start() throws Exception {
+        // left blank
+    }
+
+    @Override
+    public void stop() throws Exception {
+        // left blank
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProcess.java
new file mode 100644
index 0000000..6847ac5
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProcess.java
@@ -0,0 +1,93 @@
+package org.apache.helix.autoscale.impl.shell;
+
+import java.io.File;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.helix.autoscale.container.ContainerProcess;
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+import org.apache.helix.autoscale.container.ContainerUtils;
+import org.apache.log4j.Logger;
+
+/**
+ * Host process for Shell-based container. ContainerProcess configuration is
+ * read from path in first command-line argument. Status is maintained using
+ * temporary marker file. (Program entry point)
+ * 
+ */
+class ShellContainerProcess {
+    static final Logger             log              = Logger.getLogger(ShellContainerProcess.class);
+
+    public static final long        MONITOR_INTERVAL = 5000;
+
+    static String                   markerDir;
+    static ContainerProcess         process;
+    static ScheduledExecutorService executor         = Executors.newSingleThreadScheduledExecutor();
+
+    public static void main(String[] args) throws Exception {
+        final String propertiesPath = args[0];
+        markerDir = args[1];
+
+        ContainerProcessProperties properties = ContainerUtils.getPropertiesFromPath(propertiesPath);
+
+        process = ContainerUtils.createProcess(properties);
+
+        log.debug("Installing shutdown hooks");
+        Runtime.getRuntime().addShutdownHook(new Thread() {
+            @Override
+            public void run() {
+                log.debug("Running shutdown hook");
+                try {
+                    ShellContainerProcess.stop();
+                } catch (Exception ignore) {
+                }
+            }
+        });
+
+        log.debug("Launching shell container process");
+        process.start();
+
+        ShellUtils.createMarker(new File(markerDir));
+
+        log.debug("Launching process monitor");
+        executor.scheduleAtFixedRate(new ProcessMonitor(), 0, MONITOR_INTERVAL, TimeUnit.MILLISECONDS);
+    }
+
+    static void stop() throws InterruptedException {
+        log.debug("Shutting down shell process");
+        if (process != null) {
+            process.stop();
+            ShellUtils.destroyMarker(new File(markerDir));
+        }
+        if (executor != null) {
+            executor.shutdownNow();
+            while (!executor.isTerminated()) {
+                Thread.sleep(100);
+            }
+            executor = null;
+        }
+    }
+
+    static class ProcessMonitor implements Runnable {
+        @Override
+        public void run() {
+            if (process.isFailed()) {
+                log.warn("detected process failure");
+                try {
+                    ShellContainerProcess.stop();
+                } catch (Exception ignore) {
+                }
+                System.exit(1);
+            }
+            if (!process.isActive()) {
+                log.warn("detected process shutdown");
+                try {
+                    ShellContainerProcess.stop();
+                } catch (Exception ignore) {
+                }
+            }
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProvider.java
new file mode 100644
index 0000000..df4c6ef
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProvider.java
@@ -0,0 +1,151 @@
+package org.apache.helix.autoscale.impl.shell;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.autoscale.ContainerProvider;
+import org.apache.helix.autoscale.ContainerProviderService;
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+import org.apache.helix.autoscale.impl.shell.ShellContainerSingleton.ShellProcess;
+import org.apache.helix.autoscale.provider.ProviderProperties;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.io.Files;
+
+/**
+ * {@link ContainerProvider} spawning shell-based containers. Only works in single-VM
+ * deployments as container metadata is managed via singleton.
+ * 
+ * @see ShellContainerSingleton
+ */
+class ShellContainerProvider implements ContainerProviderService {
+
+    static final Logger                    log               = Logger.getLogger(ShellContainerProvider.class);
+
+    static final String                    RUN_COMMAND       = "/bin/sh";
+
+    static final long                      POLL_INTERVAL     = 1000;
+    static final long                      CONTAINER_TIMEOUT = 60000;
+
+    // global view of processes required
+    static final Map<String, ShellProcess> processes         = new HashMap<String, ShellProcess>();
+
+    final Map<String, Properties>          types             = new HashMap<String, Properties>();
+
+    String                                 address;
+    String                                 cluster;
+    String                                 name;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        Preconditions.checkNotNull(properties);
+        ProviderProperties providerProperties = new ProviderProperties();
+        providerProperties.putAll(properties);
+        Preconditions.checkArgument(providerProperties.isValid());
+
+        this.address = providerProperties.getProperty("address");
+        this.cluster = providerProperties.getProperty("cluster");
+        this.name = providerProperties.getProperty("name");
+
+        for (String containerType : providerProperties.getContainers()) {
+            registerType(containerType, providerProperties.getContainer(containerType));
+        }
+    }
+
+    @Override
+    public void start() throws Exception {
+        // left blank
+    }
+
+    @Override
+    public void stop() throws Exception {
+        destroyAll();
+    }
+
+    @Override
+    public void create(String id, String type) throws Exception {
+        Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            Preconditions.checkState(!processes.containsKey(id), "Process '%s' already exists", id);
+            Preconditions.checkState(types.containsKey(type), "Type '%s' is not registered", type);
+
+            ContainerProcessProperties properties = new ContainerProcessProperties(types.get(type));
+
+            properties.setProperty(ContainerProcessProperties.CLUSTER, cluster);
+            properties.setProperty(ContainerProcessProperties.NAME, id);
+            properties.setProperty(ContainerProcessProperties.ADDRESS, address);
+
+            File tmpDir = Files.createTempDir();
+            File tmpProperties = new File(tmpDir.getCanonicalPath() + File.separator + ShellUtils.SHELL_CONTAINER_PROPERTIES);
+            File tmpMarker = new File(tmpDir.getCanonicalPath());
+
+            properties.store(new FileWriter(tmpProperties), id);
+
+            log.info(String.format("Running container '%s' (properties='%s')", id, properties));
+
+            log.debug(String.format("Invoking command '%s %s %s %s'", RUN_COMMAND, ShellUtils.SHELL_CONTAINER_PATH, tmpProperties.getCanonicalPath(),
+                    tmpMarker.getCanonicalPath()));
+
+            ProcessBuilder builder = new ProcessBuilder();
+            builder.command(RUN_COMMAND, ShellUtils.SHELL_CONTAINER_PATH, tmpProperties.getCanonicalPath(), tmpMarker.getCanonicalPath());
+
+            Process process = builder.start();
+
+            processes.put(id, new ShellProcess(id, name, process, tmpDir));
+
+            long limit = System.currentTimeMillis() + CONTAINER_TIMEOUT;
+            while (!ShellUtils.hasMarker(tmpDir)) {
+                if (System.currentTimeMillis() >= limit) {
+                    throw new TimeoutException(String.format("Container '%s' failed to reach active state", id));
+                }
+                Thread.sleep(POLL_INTERVAL);
+            }
+        }
+    }
+
+    @Override
+    public void destroy(String id) throws Exception {
+        Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            if (!processes.containsKey(id))
+                throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+            log.info(String.format("Destroying container '%s'", id));
+
+            ShellProcess shell = processes.remove(id);
+            shell.process.destroy();
+            shell.process.waitFor();
+
+            FileUtils.deleteDirectory(shell.tmpDir);
+        }
+    }
+
+    @Override
+    public void destroyAll() {
+        Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            log.info("Destroying all owned processes");
+            for (ShellProcess process : new HashSet<ShellProcess>(processes.values())) {
+                if (process.owner.equals(name)) {
+			        try { destroy(process.id); } catch (Exception ignore) {}
+                }
+            }
+        }
+    }
+
+    void registerType(String name, Properties properties) {
+        log.debug(String.format("Registering container type '%s' (properties='%s')", name, properties));
+        types.put(name, properties);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProviderProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProviderProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProviderProcess.java
new file mode 100644
index 0000000..1148b4e
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProviderProcess.java
@@ -0,0 +1,45 @@
+package org.apache.helix.autoscale.impl.shell;
+
+import java.util.Properties;
+
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.provider.ProviderProcess;
+import org.apache.helix.autoscale.provider.ProviderProperties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Configurable and runnable service for {@link ShellContainerProvider}.
+ * 
+ */
+public class ShellContainerProviderProcess implements Service {
+    ShellContainerProvider provider;
+    ProviderProcess        process;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        ProviderProperties providerProperties = new ProviderProperties();
+        providerProperties.putAll(properties);
+
+        Preconditions.checkArgument(providerProperties.isValid(), "provider properties not valid (properties='%s')", properties);
+
+        provider = new ShellContainerProvider();
+        provider.configure(properties);
+
+        process = new ProviderProcess();
+        process.configure(providerProperties);
+        process.setConteinerProvider(provider);
+    }
+
+    @Override
+    public void start() throws Exception {
+        provider.start();
+        process.start();
+    }
+
+    @Override
+    public void stop() throws Exception {
+        process.stop();
+        provider.stop();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerSingleton.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerSingleton.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerSingleton.java
new file mode 100644
index 0000000..a82baea
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerSingleton.java
@@ -0,0 +1,58 @@
+package org.apache.helix.autoscale.impl.shell;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Singleton tracking metadata for shell-based containers spawned via
+ * {@link ShellContainerProvider}.
+ * 
+ */
+public class ShellContainerSingleton {
+    static final Map<String, ShellProcess> processes = new HashMap<String, ShellProcess>();
+
+    private ShellContainerSingleton() {
+        // left blank
+    }
+
+    public static Map<String, ShellProcess> getProcesses() {
+        return processes;
+    }
+
+    public static void reset() {
+        synchronized (processes) {
+            for (ShellProcess shell : processes.values()) {
+                shell.process.destroy();
+				try { shell.process.waitFor(); } catch(Exception ignore) {}
+            }
+            processes.clear();
+        }
+    }
+
+    public static void killProcess(String id) throws InterruptedException {
+        synchronized (processes) {
+            Preconditions.checkArgument(processes.containsKey(id), "Process '%s' does not exist", id);
+            Process process = processes.get(id).process;
+            process.destroy();
+            process.waitFor();
+            processes.remove(id);
+        }
+    }
+
+    static class ShellProcess {
+        final String  id;
+        final String  owner;
+        final Process process;
+        final File    tmpDir;
+
+        public ShellProcess(String id, String owner, Process process, File tmpDir) {
+            this.id = id;
+            this.owner = owner;
+            this.process = process;
+            this.tmpDir = tmpDir;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellStatusProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellStatusProvider.java
new file mode 100644
index 0000000..8094050
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellStatusProvider.java
@@ -0,0 +1,64 @@
+package org.apache.helix.autoscale.impl.shell;
+
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.autoscale.StatusProviderService;
+import org.apache.helix.autoscale.impl.shell.ShellContainerSingleton.ShellProcess;
+
+/**
+ * StatusProvider for shell-based containers spawned via
+ * {@link ShellContainerProvider}. Runnable and configurable service.
+ * 
+ */
+public class ShellStatusProvider implements StatusProviderService {
+
+    @Override
+    public boolean exists(String id) {
+        Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            return processes.containsKey(id);
+        }
+    }
+
+    @Override
+    public boolean isHealthy(String id) {
+        Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            ShellProcess shell = processes.get(id);
+
+            if (shell == null)
+                return false;
+
+            if (!ShellUtils.hasMarker(shell.tmpDir))
+                return false;
+
+            try {
+                // exit value
+                shell.process.exitValue();
+                return false;
+            } catch (IllegalThreadStateException e) {
+                // expected
+            }
+
+            return true;
+        }
+    }
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        // left blank
+    }
+
+    @Override
+    public void start() throws Exception {
+        // left blank
+    }
+
+    @Override
+    public void stop() throws Exception {
+        // left blank
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellUtils.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellUtils.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellUtils.java
new file mode 100644
index 0000000..02df0e0
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellUtils.java
@@ -0,0 +1,54 @@
+package org.apache.helix.autoscale.impl.shell;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.log4j.Logger;
+
+/**
+ * Utility for creating and destroying temporary marker files for shell-based
+ * containers.
+ * 
+ */
+class ShellUtils {
+
+    static final Logger log                        = Logger.getLogger(ShellUtils.class);
+
+    static final String SHELL_CONTAINER_PATH       = "target/metamanager-pkg/bin/shell-container-process.sh";
+    static final String SHELL_CONTAINER_PROPERTIES = "container.properties";
+    static final String SHELL_CONTAINER_MARKER     = "active";
+
+    private ShellUtils() {
+        // left blank
+    }
+
+    public static boolean hasMarker(File processDir) {
+        try {
+            log.debug(String.format("checking for marker file '%s'", getMarkerFile(processDir)));
+            if (getMarkerFile(processDir).exists())
+                return true;
+        } catch (IOException e) {
+            // ignore
+        }
+        return false;
+    }
+
+    public static void createMarker(File processDir) throws IOException {
+        log.debug(String.format("creating marker file '%s'", getMarkerFile(processDir)));
+        getMarkerFile(processDir).createNewFile();
+    }
+
+    public static void destroyMarker(File processDir) {
+        try {
+            log.debug(String.format("destroying marker file '%s'", getMarkerFile(processDir)));
+            getMarkerFile(processDir).delete();
+        } catch (IOException e) {
+            // ignore
+        }
+    }
+
+    public static File getMarkerFile(File processDir) throws IOException {
+        return new File(processDir.getCanonicalPath() + File.separatorChar + SHELL_CONTAINER_MARKER);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerData.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerData.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerData.java
new file mode 100644
index 0000000..4ebfb5d
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerData.java
@@ -0,0 +1,86 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+/**
+ * Container meta data for YARN-based containers. Reflect lifecycle of container
+ * from requesting, to bootstrapping, active operation and shutdown. Read and
+ * written by {@link YarnMasterProcess}, {@link YarnContainerProvider} and
+ * {@link YarnContainerService}. Also read by {@link YarnStatusProvider}.
+ * Typically stored in zookeeper
+ * 
+ */
+class YarnContainerData {
+
+	static enum ContainerState {
+		ACQUIRE,
+		CONNECTING,
+		ACTIVE,
+		TEARDOWN,
+		FAILED,
+		HALTED,
+		FINALIZE
+	}
+	
+    String                         id;
+    ContainerState                 state;
+    int                            yarnId;
+    String                         owner;
+    YarnContainerProcessProperties properties;
+
+    public YarnContainerData() {
+        // left blank
+    }
+
+    public YarnContainerData(String id, String owner, YarnContainerProcessProperties properties) {
+        this.id = id;
+        this.state = ContainerState.ACQUIRE;
+        this.yarnId = -1;
+        this.owner = owner;
+        this.properties = properties;
+    }
+
+    public String getId() {
+        return id;
+    }
+
+    public YarnContainerData setId(String id) {
+        this.id = id;
+        return this;
+    }
+
+    public ContainerState getState() {
+        return state;
+    }
+
+    public YarnContainerData setState(ContainerState state) {
+        this.state = state;
+        return this;
+    }
+
+    public int getYarnId() {
+        return yarnId;
+    }
+
+    public YarnContainerData setYarnId(int yarnId) {
+        this.yarnId = yarnId;
+        return this;
+    }
+
+    public String getOwner() {
+        return owner;
+    }
+
+    public YarnContainerData setOwner(String owner) {
+        this.owner = owner;
+        return this;
+    }
+
+    public YarnContainerProcessProperties getProperties() {
+        return properties;
+    }
+
+    public YarnContainerData setProperties(YarnContainerProcessProperties properties) {
+        this.properties = properties;
+        return this;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProcess.java
new file mode 100644
index 0000000..5f8d006
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProcess.java
@@ -0,0 +1,53 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import org.apache.helix.autoscale.container.ContainerProcess;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Host process for {@link ContainerProcess}es spawned by
+ * {@link YarnContainerProvider}. Configured via *.properties file in working
+ * directory. Corresponds to regular container in YARN and is managed jointly by
+ * the application master and the Helix participant. (Program entry point)
+ * 
+ */
+class YarnContainerProcess {
+    static final Logger log = Logger.getLogger(YarnContainerProcess.class);
+
+    public static void main(String[] args) throws Exception {
+        log.trace("BEGIN YarnProcess.main()");
+
+        final YarnContainerProcessProperties properties = YarnUtils.createContainerProcessProperties(YarnUtils
+                .getPropertiesFromPath(YarnUtils.YARN_CONTAINER_PROPERTIES));
+        Preconditions.checkArgument(properties.isValid(), "container properties not valid: %s", properties.toString());
+
+        log.debug("Launching yarndata service");
+        final ZookeeperYarnDataProvider metaService = new ZookeeperYarnDataProvider(properties.getYarnData());
+        metaService.start();
+
+        log.debug("Launching yarn container service");
+        final YarnContainerService yarnService = new YarnContainerService();
+        yarnService.configure(properties);
+        yarnService.setYarnDataProvider(metaService);
+        yarnService.start();
+
+        log.debug("Installing shutdown hooks");
+        Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+            @Override
+            public void run() {
+                log.debug("Running shutdown hook");
+                yarnService.stop();
+                metaService.stop();
+            }
+        }));
+
+        System.out.println("Press ENTER to stop container process");
+        System.in.read();
+
+        log.debug("Stopping container services");
+        System.exit(0);
+
+        log.trace("END YarnProcess.main()");
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProcessProperties.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProcessProperties.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProcessProperties.java
new file mode 100644
index 0000000..5ad8f63
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProcessProperties.java
@@ -0,0 +1,40 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base configuration for {@link YarnContainerProcess}. 
+ *
+ */
+public class YarnContainerProcessProperties extends ContainerProcessProperties {
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = -2209509977839674160L;
+	
+	public final static String YARNDATA = "yarndata";
+	
+	public boolean isValid() {
+		return super.isValid() &&
+		       containsKey(YARNDATA);
+	}
+	
+	public String getYarnData() {
+		return getProperty(YARNDATA);
+	}
+
+    @Override
+    public Object get(Object key) {
+        Preconditions.checkState(containsKey(key));
+        return super.get(key);
+    }
+    
+    @Override
+    public String getProperty(String key) {
+        Preconditions.checkState(containsKey(key));
+        return super.getProperty(key);
+    }
+    
+}


[11/15] Adding Helix-task-framework and Yarn integration modules

Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProvider.java
new file mode 100644
index 0000000..d490edc
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProvider.java
@@ -0,0 +1,143 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.helix.autoscale.ContainerProvider;
+import org.apache.helix.autoscale.ContainerProviderService;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * {@link ContainerProvider} spawning YARN-based containers. Reads and writes
+ * meta data using {@link YarnDataProvider}. Works in a distributed setting, but
+ * typically requires access to zookeeper.
+ * 
+ */
+class YarnContainerProvider implements ContainerProviderService {
+	
+	static final Logger log = Logger.getLogger(YarnContainerProvider.class);
+
+	static final long POLL_INTERVAL = 1000;
+	static final long CONTAINER_TIMEOUT = 60000;
+	
+	/*
+	 * CONTAINERS
+	 *   A (A, READY)
+	 *   B (B, RUNNING)
+	 */
+	
+	final Object notifier = new Object();
+    final Map<String, Properties> types = new HashMap<String, Properties>();
+	
+	ZookeeperYarnDataProvider yarnDataService;
+	YarnContainerProviderProcess yarnApp;
+	YarnContainerProviderProperties properties;
+	
+    @Override
+    public void configure(Properties properties) throws Exception {
+        YarnContainerProviderProperties yarnProps = new YarnContainerProviderProperties();
+        yarnProps.putAll(properties);
+        configure(yarnProps);
+    }
+    
+    private void configure(YarnContainerProviderProperties properties) {
+        this.properties = properties;
+        
+        for(String containerType : properties.getContainers()) {
+            registerType(containerType, properties.getContainer(containerType));
+        }
+    }
+    
+    @Override
+    public void start() throws Exception {
+        Preconditions.checkNotNull(properties);
+        Preconditions.checkState(properties.isValid(), "provider properties not valid: %s", properties);
+        
+        log.debug("Starting yarn container provider service");
+        yarnDataService = new ZookeeperYarnDataProvider();
+        yarnDataService.configure(properties);
+        yarnDataService.start();
+    }
+    
+    @Override
+    public void stop() throws Exception {
+        log.debug("Stopping yarn container provider service");
+        destroyAll();
+        
+        if(yarnDataService != null) {
+            yarnDataService.stop();
+            yarnDataService = null;
+        }
+    }
+    
+	@Override
+	public void create(final String id, final String type) throws Exception {
+	    Preconditions.checkArgument(types.containsKey(type), "Container type '%s' is not configured", type);
+	    
+		YarnContainerProcessProperties containerProperties = YarnUtils.createContainerProcessProperties(types.get(type));
+
+        log.info(String.format("Running container '%s' (properties='%s')", id, containerProperties));
+        
+		yarnDataService.create(new YarnContainerData(id, properties.getName(), containerProperties));
+		waitForState(id, ContainerState.ACTIVE);
+	}
+
+	@Override
+	public void destroy(final String id) throws Exception {
+		YarnContainerData meta = yarnDataService.read(id);
+
+		if(meta.state == ContainerState.ACTIVE) {
+			log.info(String.format("Destroying active container, going to teardown"));
+			yarnDataService.update(meta.setState(ContainerState.TEARDOWN));
+			
+		} else if(meta.state == ContainerState.FAILED) {
+			log.info(String.format("Destroying failed container, going to teardown"));
+			yarnDataService.update(meta.setState(ContainerState.TEARDOWN));
+			
+		} else if(meta.state == ContainerState.FINALIZE) {
+			log.info(String.format("Destroying finalized container, skipping"));
+			
+		} else {
+			throw new IllegalStateException(String.format("Container '%s' must be active, failed or finalized", id));
+		}
+		
+		waitForState(id, ContainerState.FINALIZE);
+		yarnDataService.delete(id);
+	}
+
+	@Override
+	public void destroyAll() {
+		try {
+			for(YarnContainerData meta : yarnDataService.readAll()) {
+			    if(meta.owner.equals(properties.getName())) {
+			        try { destroy(meta.id); } catch (Exception ignore) {}
+			    }
+			}
+		} catch (Exception ignore) {
+			// ignore
+		}
+	}
+
+	void waitForState(String id, ContainerState state) throws Exception, InterruptedException, TimeoutException {
+		long limit = System.currentTimeMillis() + CONTAINER_TIMEOUT;
+		YarnContainerData meta = yarnDataService.read(id);
+		while(meta.state != state) {
+			if(System.currentTimeMillis() >= limit) {
+				throw new TimeoutException(String.format("Container '%s' failed to reach state '%s' (currently is '%s')", id, state, meta.state));
+			}
+			Thread.sleep(POLL_INTERVAL);
+			meta = yarnDataService.read(id);
+		}
+	}
+	
+    void registerType(String name, Properties properties) {
+        log.debug(String.format("Registering container type '%s' (properties='%s')", name, properties));
+        types.put(name, properties);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProviderProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProviderProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProviderProcess.java
new file mode 100644
index 0000000..20a8b92
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProviderProcess.java
@@ -0,0 +1,158 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.io.File;
+import java.net.InetSocketAddress;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ClientRMProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.autoscale.Service;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Configurable and runnable service for {@link YarnContainerProvider}
+ * 
+ */
+public class YarnContainerProviderProcess implements Service {
+
+    static final Logger             log                 = Logger.getLogger(YarnContainerProviderProcess.class);
+
+    static String                   YARN_MASTER_COMMAND = "/bin/sh %s 1>%s/stdout 2>%s/stderr";
+
+    Configuration                   conf;
+    YarnRPC                         rpc;
+    ClientRMProtocol                rmClient;
+    ApplicationId                   appId;
+    File                            propertiesFile;
+
+    YarnContainerProviderProperties properties;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        configure(YarnUtils.createContainerProviderProperties(properties));
+    }
+
+    private void configure(YarnContainerProviderProperties properties) {
+        this.conf = new YarnConfiguration();
+        this.conf.set(YarnConfiguration.RM_ADDRESS, properties.getResourceManager());
+        this.conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, properties.getScheduler());
+        this.conf.set(FileSystem.FS_DEFAULT_NAME_KEY, properties.getHdfs());
+
+        this.rpc = YarnRPC.create(conf);
+
+        this.properties = properties;
+    }
+
+    @Override
+    public void start() throws Exception {
+        Preconditions.checkNotNull(properties);
+        Preconditions.checkState(properties.isValid());
+
+        connect();
+
+        String command = String.format(YARN_MASTER_COMMAND, YarnUtils.YARN_MASTER_PATH, ApplicationConstants.LOG_DIR_EXPANSION_VAR,
+                ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+
+        log.info(String.format("Starting application '%s' provider '%s' (masterCommand='%s')", properties.getYarnData(), properties.getName(), command));
+
+        log.debug(String.format("Running master command \"%s\"", command));
+
+        // app id
+        GetNewApplicationRequest appRequest = Records.newRecord(GetNewApplicationRequest.class);
+        GetNewApplicationResponse appResponse = rmClient.getNewApplication(appRequest);
+
+        this.appId = appResponse.getApplicationId();
+
+        log.info(String.format("Acquired app id '%s' for '%s'", appId.toString(), properties.getName()));
+
+        // command
+        ContainerLaunchContext launchContext = Records.newRecord(ContainerLaunchContext.class);
+        launchContext.setCommands(Collections.singletonList(command));
+
+        // resource limit
+        Resource resource = Records.newRecord(Resource.class);
+        resource.setMemory(256); // TODO make dynamic
+        launchContext.setResource(resource);
+
+        // environment
+        Map<String, String> env = new HashMap<String, String>();
+        launchContext.setEnvironment(env);
+
+        // configuration
+        propertiesFile = YarnUtils.writePropertiesToTemp(properties);
+
+        // HDFS
+        final String namespace = appId.toString();
+        final Path masterArchive = YarnUtils.copyToHdfs(YarnUtils.YARN_MASTER_ARCHIVE_PATH, YarnUtils.YARN_MASTER_STAGING, namespace, conf);
+        final Path masterProperties = YarnUtils.copyToHdfs(propertiesFile.getCanonicalPath(), YarnUtils.YARN_MASTER_PROPERTIES, namespace, conf);
+        final Path containerArchive = YarnUtils.copyToHdfs(YarnUtils.YARN_CONTAINER_ARCHIVE_PATH, YarnUtils.YARN_CONTAINER_STAGING, namespace, conf);
+
+        // local resources
+        Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
+        localResources.put(YarnUtils.YARN_MASTER_DESTINATION, YarnUtils.createHdfsResource(masterArchive, LocalResourceType.ARCHIVE, conf));
+        localResources.put(YarnUtils.YARN_MASTER_PROPERTIES, YarnUtils.createHdfsResource(masterProperties, LocalResourceType.FILE, conf));
+        localResources.put(YarnUtils.YARN_CONTAINER_STAGING, YarnUtils.createHdfsResource(containerArchive, LocalResourceType.FILE, conf));
+
+        launchContext.setLocalResources(localResources);
+
+        // user
+        launchContext.setUser(properties.getUser());
+
+        // app submission
+        ApplicationSubmissionContext subContext = Records.newRecord(ApplicationSubmissionContext.class);
+        subContext.setApplicationId(appId);
+        subContext.setApplicationName(properties.getName());
+        subContext.setAMContainerSpec(launchContext);
+
+        SubmitApplicationRequest subRequest = Records.newRecord(SubmitApplicationRequest.class);
+        subRequest.setApplicationSubmissionContext(subContext);
+
+        log.info(String.format("Starting app id '%s'", appId.toString()));
+
+        rmClient.submitApplication(subRequest);
+
+    }
+
+    @Override
+    public void stop() throws YarnRemoteException {
+        log.info(String.format("Stopping app id '%s'", appId.toString()));
+        KillApplicationRequest killRequest = Records.newRecord(KillApplicationRequest.class);
+        killRequest.setApplicationId(appId);
+
+        rmClient.forceKillApplication(killRequest);
+
+		try { YarnUtils.destroyHdfsNamespace(appId.toString(), conf); } catch(Exception ignore) {}
+		
+        propertiesFile.delete();
+    }
+
+    void connect() {
+        YarnConfiguration yarnConf = new YarnConfiguration(conf);
+        InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS));
+        log.info("Connecting to ResourceManager at: " + rmAddress);
+        this.rmClient = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, conf));
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProviderProperties.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProviderProperties.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProviderProperties.java
new file mode 100644
index 0000000..85c8ab5
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProviderProperties.java
@@ -0,0 +1,64 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import org.apache.helix.autoscale.provider.ProviderProperties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base configuration for {@link YarnContainerProviderProcess} 
+ *
+ */
+public class YarnContainerProviderProperties extends ProviderProperties {
+	/**
+     * 
+     */
+    private static final long serialVersionUID = -8853614843205587170L;
+    
+	public final static String YARNDATA = "yarndata";
+    public final static String RESOURCEMANAGER = "resourcemananger";
+    public final static String SCHEDULER = "scheduler";
+    public final static String USER = "user";
+    public final static String HDFS = "hdfs";
+    
+	public boolean isValid() {
+		return super.isValid() &&
+		       containsKey(YARNDATA) &&
+			   containsKey(RESOURCEMANAGER) &&
+			   containsKey(SCHEDULER) &&
+			   containsKey(USER) &&
+			   containsKey(HDFS);
+	}
+	
+	public String getYarnData() {
+		return getProperty(YARNDATA);
+	}
+
+    public String getResourceManager() {
+        return getProperty(RESOURCEMANAGER);
+    }
+
+    public String getScheduler() {
+        return getProperty(SCHEDULER);
+    }
+
+    public String getUser() {
+        return getProperty(USER);
+    }
+
+    public String getHdfs() {
+        return getProperty(HDFS);
+    }
+    
+    @Override
+    public String getProperty(String key) {
+        Preconditions.checkState(containsKey(key));
+        return super.getProperty(key);
+    }
+    
+    @Override
+    public Object get(Object key) {
+        Preconditions.checkState(containsKey(key));
+        return super.get(key);
+    }
+    
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerService.java
new file mode 100644
index 0000000..e730c25
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerService.java
@@ -0,0 +1,156 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.io.File;
+import java.util.Properties;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.container.ContainerProcess;
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+import org.apache.helix.autoscale.container.ContainerUtils;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Configurable and runnable service for YARN-based containers. Continuously
+ * checks container meta data and process state and triggers state changes and
+ * container setup and shutdown.
+ * 
+ */
+class YarnContainerService implements Service {
+    static final Logger            log                       = Logger.getLogger(YarnContainerService.class);
+
+    static final long              CONTAINERSERVICE_INTERVAL = 1000;
+
+    YarnContainerProcessProperties properties;
+
+    YarnDataProvider               metaService;
+    ScheduledExecutorService       executor;
+
+    ContainerProcess               process;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        Preconditions.checkNotNull(properties);
+        YarnContainerProcessProperties containerProperties = new YarnContainerProcessProperties();
+        containerProperties.putAll(properties);
+        Preconditions.checkArgument(containerProperties.isValid());
+
+        this.properties = containerProperties;
+    }
+
+    public void setYarnDataProvider(YarnDataProvider metaService) {
+        this.metaService = metaService;
+    }
+
+    @Override
+    public void start() {
+        Preconditions.checkNotNull(metaService);
+        Preconditions.checkNotNull(properties);
+        Preconditions.checkState(properties.isValid());
+
+        log.debug("starting yarn container service");
+
+        executor = Executors.newSingleThreadScheduledExecutor();
+        executor.scheduleAtFixedRate(new ContainerStatusService(), 0, CONTAINERSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+    }
+
+    @Override
+    public void stop() {
+        log.debug("stopping yarn container service");
+
+        if (executor != null) {
+            executor.shutdown();
+            while (!executor.isTerminated()) {
+                try {
+                    Thread.sleep(100);
+                } catch (InterruptedException e) {
+                    // ignore
+                }
+            }
+            executor = null;
+        }
+
+        destroyLocalContainerNamespace();
+    }
+
+    class ContainerStatusService implements Runnable {
+        @Override
+        public void run() {
+            log.info("updating container status");
+
+            try {
+                if (!metaService.exists(properties.getName())) {
+                    log.warn(String.format("YarnData for '%s' does not exist. Terminating yarn service.", properties.getName()));
+                    process.stop();
+                    stop();
+                }
+
+                YarnContainerData meta = metaService.read(properties.getName());
+
+                if (meta.state == ContainerState.CONNECTING) {
+                    log.trace("container connecting");
+                    try {
+                        ContainerProcessProperties containerProperties = meta.getProperties();
+
+                        containerProperties.setProperty(ContainerProcessProperties.CLUSTER, properties.getCluster());
+                        containerProperties.setProperty(ContainerProcessProperties.ADDRESS, properties.getAddress());
+                        containerProperties.setProperty(ContainerProcessProperties.NAME, properties.getName());
+
+                        process = ContainerUtils.createProcess(containerProperties);
+                        process.start();
+                    } catch (Exception e) {
+                        log.error("Failed to start participant, going to failed", e);
+                    }
+
+                    if (process.isActive()) {
+                        log.trace("process active, activating container");
+                        metaService.update(meta.setState(ContainerState.ACTIVE));
+
+                    } else if (process.isFailed()) {
+                        log.trace("process failed, failing container");
+                        metaService.update(meta.setState(ContainerState.FAILED));
+
+                    } else {
+                        log.trace("process state unknown, failing container");
+                        metaService.update(meta.setState(ContainerState.FAILED));
+                    }
+                }
+
+                if (meta.state == ContainerState.ACTIVE) {
+                    log.trace("container active");
+                    if (process.isFailed()) {
+                        log.trace("process failed, failing container");
+                        metaService.update(meta.setState(ContainerState.FAILED));
+
+                    } else if (!process.isActive()) {
+                        log.trace("process not active, halting container");
+                        process.stop();
+                        metaService.update(meta.setState(ContainerState.HALTED));
+                    }
+                }
+
+                if (meta.state == ContainerState.TEARDOWN) {
+                    log.trace("container teardown");
+                    process.stop();
+                    metaService.update(meta.setState(ContainerState.HALTED));
+                }
+
+            } catch (Exception e) {
+                log.error(String.format("Error while updating container '%s' status", properties.getName()), e);
+            }
+        }
+    }
+
+    public static void destroyLocalContainerNamespace() {
+        log.info("cleaning up container directory");
+        FileUtils.deleteQuietly(new File(YarnUtils.YARN_CONTAINER_DESTINATION));
+        FileUtils.deleteQuietly(new File(YarnUtils.YARN_CONTAINER_PROPERTIES));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnDataProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnDataProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnDataProvider.java
new file mode 100644
index 0000000..188045d
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnDataProvider.java
@@ -0,0 +1,73 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.util.Collection;
+
+/**
+ * Abstraction for a (remote) repository of yarn container meta data. Meta data
+ * is read and updated by {@link YarnContainerProvider}
+ * {@link YarnMasterProcess}, {@link YarnContainerProcess}.<br/>
+ * <b>NOTE:</b> Each operation is assumed to be atomic.
+ * 
+ */
+interface YarnDataProvider {
+
+    /**
+     * Checks for existence of meta data about container insatnce
+     * 
+     * @param id
+     *            unique container id
+     * @return true, if meta data exists
+     */
+    public boolean exists(String id);
+
+    /**
+     * Create meta data entry. Check for non-existence of meta data for given
+     * container id and create node.
+     * 
+     * @param data
+     *            container meta data with unique id
+     * @throws Exception
+     *             if meta data entry already exist
+     */
+    public void create(YarnContainerData data) throws Exception;
+
+    /**
+     * Read meta data for given container id.
+     * 
+     * @param id
+     *            unique container id
+     * @return yarn container data
+     * @throws Exception
+     *             if meta data entry for given id does not exist
+     */
+    public YarnContainerData read(String id) throws Exception;
+
+    /**
+     * Read all meta data stored for this domain space of yarn providers and
+     * containers.
+     * 
+     * @return collection of meta data entries, empty if none
+     * @throws Exception
+     */
+    public Collection<YarnContainerData> readAll() throws Exception;
+
+    /**
+     * Write meta data entry.
+     * 
+     * @param data
+     *            yarn container meta data
+     * @throws Exception
+     *             if meta data entry for given id does not exist
+     */
+    public void update(YarnContainerData data) throws Exception;
+
+    /**
+     * Delete meta data entry. Frees up unique id to be reused. May throw an
+     * exception on non-existence or be idempotent.
+     * 
+     * @param id
+     *            unique container id
+     * @throws Exception
+     */
+    public void delete(String id) throws Exception;
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterProcess.java
new file mode 100644
index 0000000..25b73f5
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterProcess.java
@@ -0,0 +1,144 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.net.InetSocketAddress;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.autoscale.provider.ProviderProcess;
+import org.apache.log4j.Logger;
+
+/**
+ * Host process for {@link YarnContainerProviderProcess}. Hasts application
+ * master in YARN and provider participant to Helix meta cluster. (Program entry
+ * point)
+ * 
+ */
+class YarnMasterProcess {
+
+    static final Logger log = Logger.getLogger(YarnMasterProcess.class);
+
+    public static void main(String[] args) throws Exception {
+        log.trace("BEGIN YarnMaster.main()");
+
+        final ApplicationAttemptId appAttemptId = getApplicationAttemptId();
+        log.info(String.format("Got application attempt id '%s'", appAttemptId.toString()));
+
+        log.debug("Reading master properties");
+        YarnMasterProperties properties = YarnUtils.createMasterProperties(YarnUtils.getPropertiesFromPath(YarnUtils.YARN_MASTER_PROPERTIES));
+
+        if (!properties.isValid())
+            throw new IllegalArgumentException(String.format("master properties not valid: %s", properties.toString()));
+
+        log.debug("Connecting to resource manager");
+        Configuration conf = new YarnConfiguration();
+        conf.set(YarnConfiguration.RM_ADDRESS, properties.getResourceManager());
+        conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, properties.getScheduler());
+        conf.set(FileSystem.FS_DEFAULT_NAME_KEY, properties.getHdfs());
+
+        final AMRMProtocol resourceManager = getResourceManager(conf);
+
+        // register the AM with the RM
+        log.debug("Registering application master");
+        RegisterApplicationMasterRequest appMasterRequest = Records.newRecord(RegisterApplicationMasterRequest.class);
+        appMasterRequest.setApplicationAttemptId(appAttemptId);
+        appMasterRequest.setHost("");
+        appMasterRequest.setRpcPort(0);
+        appMasterRequest.setTrackingUrl("");
+
+        resourceManager.registerApplicationMaster(appMasterRequest);
+
+        log.debug("Starting yarndata service");
+        final ZookeeperYarnDataProvider yarnDataService = new ZookeeperYarnDataProvider(properties.getYarnData());
+        yarnDataService.start();
+
+        log.debug("Starting yarn master service");
+        final YarnMasterService service = new YarnMasterService();
+        service.configure(properties);
+        service.setAttemptId(appAttemptId);
+        service.setYarnDataProvider(yarnDataService);
+        service.setProtocol(resourceManager);
+        service.setYarnConfiguration(conf);
+        service.start();
+
+        log.debug("Starting provider");
+        final YarnContainerProvider provider = new YarnContainerProvider();
+        provider.configure(properties);
+        provider.start();
+
+        log.debug("Starting provider process");
+        final ProviderProcess process = new ProviderProcess();
+        process.configure(properties);
+        process.setConteinerProvider(provider);
+        process.start();
+
+        log.debug("Installing shutdown hooks");
+        Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+            @Override
+            public void run() {
+                log.debug("Stopping provider process");
+                process.stop();
+
+                log.debug("Stopping provider");
+                try { provider.stop(); } catch (Exception ignore) {}
+
+                log.debug("Stopping yarn master service");
+                service.stop();
+
+                log.debug("Stopping yarndata service");
+                yarnDataService.stop();
+
+                // finish application
+                log.debug("Sending finish request");
+                FinishApplicationMasterRequest finishReq = Records.newRecord(FinishApplicationMasterRequest.class);
+
+                finishReq.setAppAttemptId(getApplicationAttemptId());
+                finishReq.setFinishApplicationStatus(FinalApplicationStatus.SUCCEEDED);
+	    	    
+	    	    try { resourceManager.finishApplicationMaster(finishReq); } catch(Exception ignore) {}
+	    	}
+	    }));
+	    
+		log.trace("END YarnMaster.main()");
+	}
+	
+    static AMRMProtocol getResourceManager(Configuration conf) {
+        // Connect to the Scheduler of the ResourceManager.
+        YarnConfiguration yarnConf = new YarnConfiguration(conf);
+        YarnRPC rpc = YarnRPC.create(yarnConf);
+        InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(YarnConfiguration.RM_SCHEDULER_ADDRESS,
+                YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS));
+        log.info("Connecting to ResourceManager at " + rmAddress);
+        AMRMProtocol resourceManager = (AMRMProtocol) rpc.getProxy(AMRMProtocol.class, rmAddress, conf);
+        return resourceManager;
+    }
+
+    static ApplicationAttemptId getApplicationAttemptId() {
+        ContainerId containerId = ConverterUtils.toContainerId(getEnv(ApplicationConstants.AM_CONTAINER_ID_ENV));
+        ApplicationAttemptId appAttemptID = containerId.getApplicationAttemptId();
+        return appAttemptID;
+    }
+
+    static String getEnv(String key) {
+        Map<String, String> envs = System.getenv();
+        String clusterName = envs.get(key);
+        if (clusterName == null) {
+            // container id should always be set in the env by the framework
+            throw new IllegalArgumentException(String.format("%s not set in the environment", key));
+        }
+        return clusterName;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterProperties.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterProperties.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterProperties.java
new file mode 100644
index 0000000..3f49852
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterProperties.java
@@ -0,0 +1,13 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+/**
+ * Base configuration for {@link YarnMasterProcess}. 
+ *
+ */
+public class YarnMasterProperties extends YarnContainerProviderProperties {
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = -2209509980239674160L;
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterService.java
new file mode 100644
index 0000000..03d4f72
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterService.java
@@ -0,0 +1,414 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.io.File;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ContainerManager;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.records.AMResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+/**
+ * Implements YARN application master. Continuously monitors container health in
+ * YARN and yarn meta data updates. Spawns and destroys containers.
+ * 
+ */
+class YarnMasterService implements Service {
+
+    static final Logger                     log                    = Logger.getLogger(YarnMasterService.class);
+
+    static final String                     REQUIRED_TYPE          = "container";
+
+    static final long                       ZOOKEEPER_TIMEOUT      = 5000;
+    static final long                       MASTERSERVICE_INTERVAL = 1000;
+
+    static final String                     CONTAINERS             = "CONTAINERS";
+
+    static final String                     YARN_CONTAINER_COMMAND = "/bin/sh %s 1>%s/stdout 2>%s/stderr";
+
+    YarnMasterProperties                    properties;
+    AMRMProtocol                            protocol;
+    ApplicationAttemptId                    attemptId;
+    Configuration                           yarnConfig;
+    YarnDataProvider                        yarnDataService;
+
+    final Map<ContainerId, Container>       unassignedContainers   = new HashMap<ContainerId, Container>();
+    final Map<ContainerId, Container>       activeContainers       = new HashMap<ContainerId, Container>();
+    final Map<ContainerId, ContainerStatus> completedContainers    = new HashMap<ContainerId, ContainerStatus>();
+    final Map<ContainerId, String>          yarn2meta              = new HashMap<ContainerId, String>();
+
+    ScheduledExecutorService                executor;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        YarnMasterProperties yarnProperties = YarnUtils.createMasterProperties(properties);
+        Preconditions.checkArgument(yarnProperties.isValid());
+        this.properties = yarnProperties;
+    }
+
+    public void setProtocol(AMRMProtocol protocol) {
+        this.protocol = protocol;
+    }
+
+    public void setAttemptId(ApplicationAttemptId attemptId) {
+        this.attemptId = attemptId;
+    }
+
+    public void setYarnConfiguration(Configuration yarnConfig) {
+        this.yarnConfig = yarnConfig;
+    }
+
+    public void setYarnDataProvider(YarnDataProvider yarnDataService) {
+        this.yarnDataService = yarnDataService;
+    }
+
+    @Override
+    public void start() {
+        Preconditions.checkNotNull(properties);
+        Preconditions.checkNotNull(protocol);
+        Preconditions.checkNotNull(attemptId);
+        Preconditions.checkNotNull(yarnConfig);
+        Preconditions.checkNotNull(yarnDataService);
+
+        log.debug("starting yarn master service");
+
+        executor = Executors.newSingleThreadScheduledExecutor();
+        executor.scheduleAtFixedRate(new YarnService(), 0, MASTERSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+    }
+
+    @Override
+    public void stop() {
+        log.debug("stopping yarn master service");
+
+        if (executor != null) {
+            executor.shutdown();
+            while (!executor.isTerminated()) {
+                try {
+                    Thread.sleep(100);
+                } catch (InterruptedException e) {
+                    // ignore
+                }
+            }
+            executor = null;
+        }
+
+        destroyLocalMasterNamespace();
+    }
+
+    Collection<YarnContainerData> readOwnedYarnData() throws Exception {
+        log.debug("reading container data");
+
+        Collection<YarnContainerData> containers = new ArrayList<YarnContainerData>();
+        for (YarnContainerData meta : yarnDataService.readAll()) {
+            if (meta.owner.equals(properties.getName())) {
+                containers.add(meta);
+                log.debug(String.format("found container node '%s' (state=%s, yarnId=%s, owner=%s)", meta.id, meta.state, meta.yarnId, meta.owner));
+            }
+        }
+        return containers;
+    }
+
+    class YarnService implements Runnable {
+        int responseId = 0;
+
+        @Override
+        public void run() {
+            try {
+                log.debug("running yarn service update cycle");
+
+                Collection<YarnContainerData> yarndata = readOwnedYarnData();
+
+                // active meta containers
+                int numMetaActive = countActiveMeta(yarndata);
+
+                // newly acquired meta containers
+                int numMetaAcquire = countAcquireMeta(yarndata);
+
+                // destroyed meta containers
+                List<ContainerId> destroyedReleasedIds = createDestroyedReleaseList(yarndata);
+                int numMetaCompleted = destroyedReleasedIds.size();
+
+                int numMeta = numMetaAcquire + numMetaActive + numMetaCompleted;
+
+                // yarn containers
+                int numYarnUnassigned = unassignedContainers.size();
+                int numYarnActive = activeContainers.size();
+                int numYarnCompleted = completedContainers.size();
+                int numYarn = numYarnUnassigned + numYarnActive + numYarnCompleted;
+
+                int numYarnRequired = numMetaAcquire - numYarnUnassigned;
+
+                // additionally required containers
+                int numRequestAdditional = Math.max(0, numYarnRequired);
+
+                // overstock containers
+                List<ContainerId> unneededReleasedIds = createOverstockReleaseList(numYarnRequired);
+
+                int numReleased = destroyedReleasedIds.size() + unneededReleasedIds.size();
+
+                log.debug(String.format("meta containers (total=%d, acquire=%d, active=%d, completed=%d)", numMeta, numMetaAcquire, numMetaActive, numMetaCompleted));
+                log.debug(String.format("yarn containers (total=%d, unassigned=%d, active=%d, completed=%d)", numYarn, numYarnUnassigned, numYarnActive, numYarnCompleted));
+                log.debug(String.format("requesting %d new containers, releasing %d", numRequestAdditional, numReleased));
+
+                Priority priority = Records.newRecord(Priority.class);
+                priority.setPriority(0);
+
+                Resource resource = Records.newRecord(Resource.class);
+                resource.setMemory(256); // TODO make dynamic
+
+                ResourceRequest resourceRequest = Records.newRecord(ResourceRequest.class);
+                resourceRequest.setHostName("*");
+                resourceRequest.setNumContainers(numRequestAdditional);
+                resourceRequest.setPriority(priority);
+                resourceRequest.setCapability(resource);
+
+                AllocateRequest request = Records.newRecord(AllocateRequest.class);
+                request.setResponseId(responseId);
+                request.setApplicationAttemptId(attemptId);
+                request.addAsk(resourceRequest);
+                request.addAllReleases(destroyedReleasedIds);
+                request.addAllReleases(unneededReleasedIds);
+
+                responseId++;
+
+                AllocateResponse allocateResponse = null;
+                try {
+                    allocateResponse = protocol.allocate(request);
+                } catch (YarnRemoteException e) {
+                    // ignore
+                    log.error("Error allocating containers", e);
+                    return;
+                }
+
+                AMResponse response = allocateResponse.getAMResponse();
+
+                // remove unassigned container about to be freed
+                for (ContainerId id : unneededReleasedIds) {
+                    log.info(String.format("Unassigned container '%s' about to be freed, removing", id));
+                    unassignedContainers.remove(id);
+                }
+
+                // newly added containers
+                for (Container container : response.getAllocatedContainers()) {
+                    unassignedContainers.put(container.getId(), container);
+                }
+
+                log.info(String.format("%d new containers available, %d required", unassignedContainers.size(), numMetaAcquire));
+
+                Iterator<Container> itYarn = unassignedContainers.values().iterator();
+                Iterator<YarnContainerData> itMeta = yarndata.iterator();
+                while (itYarn.hasNext() && itMeta.hasNext()) {
+                    YarnContainerData meta = itMeta.next();
+
+                    if (meta.yarnId >= 0)
+                        continue;
+
+                    Container containerYarn = itYarn.next();
+
+                    log.debug(String.format("assigning yarn container '%s' to container node '%s'", containerYarn.getId(), meta.id));
+
+                    String command = String.format(YARN_CONTAINER_COMMAND, YarnUtils.YARN_CONTAINER_PATH, ApplicationConstants.LOG_DIR_EXPANSION_VAR,
+                            ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+
+                    log.debug(String.format("Running container command \"%s\"", command));
+
+                    // configuration
+                    YarnContainerProcessProperties containerProp = meta.getProperties();
+                    containerProp.setProperty(YarnContainerProcessProperties.ADDRESS, properties.getAddress());
+                    containerProp.setProperty(YarnContainerProcessProperties.CLUSTER, properties.getCluster());
+                    containerProp.setProperty(YarnContainerProcessProperties.YARNDATA, properties.getYarnData());
+                    containerProp.setProperty(YarnContainerProcessProperties.NAME, meta.id);
+
+                    File propertiesFile = YarnUtils.writePropertiesToTemp(containerProp);
+
+                    // HDFS
+                    final String namespace = attemptId.getApplicationId().toString() + "/" + meta.id;
+                    final Path containerArchive = YarnUtils.copyToHdfs(YarnUtils.YARN_CONTAINER_STAGING, YarnUtils.YARN_CONTAINER_STAGING, namespace, yarnConfig);
+                    final Path containerProperties = YarnUtils.copyToHdfs(propertiesFile.getCanonicalPath(), YarnUtils.YARN_CONTAINER_PROPERTIES, namespace, yarnConfig);
+
+                    // local resources
+                    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
+                    localResources.put(YarnUtils.YARN_CONTAINER_DESTINATION,
+                            YarnUtils.createHdfsResource(containerArchive, LocalResourceType.ARCHIVE, yarnConfig));
+                    localResources.put(YarnUtils.YARN_CONTAINER_PROPERTIES,
+                            YarnUtils.createHdfsResource(containerProperties, LocalResourceType.FILE, yarnConfig));
+
+                    ContainerLaunchContext context = Records.newRecord(ContainerLaunchContext.class);
+                    context.setContainerId(containerYarn.getId());
+                    context.setResource(containerYarn.getResource());
+                    context.setEnvironment(Maps.<String, String> newHashMap());
+                    context.setCommands(Collections.singletonList(command));
+                    context.setLocalResources(localResources);
+                    context.setUser(properties.getUser());
+
+                    log.debug(String.format("container '%s' executing command '%s'", meta.id, command));
+
+                    StartContainerRequest startReq = Records.newRecord(StartContainerRequest.class);
+                    startReq.setContainerLaunchContext(context);
+
+                    try {
+                        getContainerManager(containerYarn).startContainer(startReq);
+
+                    } catch (YarnRemoteException e) {
+                        log.error(String.format("Error starting container '%s'", meta.id), e);
+                        return;
+                    }
+
+                    log.debug(String.format("container '%s' started, updating container node", meta.id));
+
+                    meta.setProperties(containerProp);
+                    meta.setState(ContainerState.CONNECTING);
+                    meta.setYarnId(containerYarn.getId().getId());
+                    yarnDataService.update(meta);
+
+                    yarn2meta.put(containerYarn.getId(), meta.id);
+
+                    log.debug(String.format("removing '%s' from unassigned yarn containers and adding to active list", containerYarn.getId()));
+
+                    itYarn.remove();
+                    activeContainers.put(containerYarn.getId(), containerYarn);
+
+                    // cleanup
+                    propertiesFile.deleteOnExit();
+
+                }
+
+                for (ContainerStatus status : response.getCompletedContainersStatuses()) {
+                    ContainerId id = status.getContainerId();
+
+                    log.info(String.format("Container '%s' completed", id));
+
+                    if (unassignedContainers.containsKey(id)) {
+                        log.info(String.format("Unassigned container '%s' terminated, removing", id));
+                        unassignedContainers.remove(id);
+                    }
+
+                    if (activeContainers.containsKey(id)) {
+                        log.info(String.format("Active container '%s' terminated, removing", id));
+                        activeContainers.remove(id);
+
+                        String metaId = yarn2meta.get(id);
+                        YarnContainerData meta = yarnDataService.read(metaId);
+
+                        log.debug(String.format("container '%s' finalized, updating container node", meta.id));
+
+                        yarnDataService.update(meta.setState(ContainerState.FINALIZE));
+                    }
+
+                    completedContainers.put(id, status);
+                }
+
+                log.debug("yarn service update cycle complete");
+
+            } catch (Exception e) {
+                log.error("Error while executing yarn update cycle", e);
+            }
+        }
+
+        private List<ContainerId> createOverstockReleaseList(int numYarnRequired) {
+            List<ContainerId> unneededReleasedIds = new ArrayList<ContainerId>();
+            Iterator<Container> itUnassigned = unassignedContainers.values().iterator();
+            if (numYarnRequired < 0) {
+                for (int i = 0; i < -numYarnRequired && itUnassigned.hasNext(); i++) {
+                    Container container = itUnassigned.next();
+                    unneededReleasedIds.add(container.getId());
+                    log.debug(String.format("Container '%s' no longer required, removing", container.getId()));
+                    itUnassigned.remove();
+                }
+            }
+            return unneededReleasedIds;
+        }
+
+        private List<ContainerId> createDestroyedReleaseList(Collection<YarnContainerData> yarndata) {
+            List<ContainerId> releasedIds = new ArrayList<ContainerId>();
+            for (YarnContainerData meta : yarndata) {
+                if (meta.state == ContainerState.HALTED) {
+                    ContainerId containerId = Records.newRecord(ContainerId.class);
+                    containerId.setApplicationAttemptId(attemptId);
+                    containerId.setId(meta.yarnId);
+                    releasedIds.add(containerId);
+                    log.debug(String.format("releasing container '%s'", containerId));
+                }
+            }
+            return releasedIds;
+        }
+
+        private int countAcquireMeta(Collection<YarnContainerData> yarndata) {
+            int numMetaAcquire = 0;
+            for (YarnContainerData meta : yarndata) {
+                if (meta.state == ContainerState.ACQUIRE) {
+                    numMetaAcquire++;
+                }
+            }
+            return numMetaAcquire;
+        }
+
+        private int countActiveMeta(Collection<YarnContainerData> yarndata) {
+            int numMetaActive = 0;
+            for (YarnContainerData meta : yarndata) {
+                if (meta.state != ContainerState.ACQUIRE && meta.state != ContainerState.HALTED && meta.state != ContainerState.FINALIZE) {
+                    numMetaActive++;
+                }
+            }
+            return numMetaActive;
+        }
+    }
+
+    private ContainerManager getContainerManager(Container container) {
+        YarnConfiguration yarnConf = new YarnConfiguration(yarnConfig);
+        YarnRPC rpc = YarnRPC.create(yarnConf);
+        NodeId nodeId = container.getNodeId();
+        String containerIpPort = String.format("%s:%d", nodeId.getHost(), nodeId.getPort());
+        log.info("Connecting to ContainerManager at: " + containerIpPort);
+        InetSocketAddress addr = NetUtils.createSocketAddr(containerIpPort);
+        ContainerManager cm = (ContainerManager) rpc.getProxy(ContainerManager.class, addr, yarnConfig);
+        return cm;
+    }
+
+    public static void destroyLocalMasterNamespace() {
+        log.info("cleaning up master directory");
+        FileUtils.deleteQuietly(new File(YarnUtils.YARN_MASTER_DESTINATION));
+        FileUtils.deleteQuietly(new File(YarnUtils.YARN_MASTER_PROPERTIES));
+        FileUtils.deleteQuietly(new File(YarnUtils.YARN_CONTAINER_STAGING));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnStatusProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnStatusProvider.java
new file mode 100644
index 0000000..6ec4710
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnStatusProvider.java
@@ -0,0 +1,67 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.util.Properties;
+
+import org.apache.helix.autoscale.StatusProviderService;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+/**
+ * StatusProvider for YARN-based containers spawned via
+ * {@link YarnContainerProvider}. Reads {@link YarnDataProvider} meta data.
+ * Runnable and configurable service.
+ * 
+ */
+public class YarnStatusProvider implements StatusProviderService {
+
+    static final Logger       log = Logger.getLogger(YarnStatusProvider.class);
+
+    String                    yarndata;
+
+    ZookeeperYarnDataProvider yarnDataService;
+
+    public YarnStatusProvider() {
+        // left blank
+    }
+
+    public YarnStatusProvider(String yarndata) {
+        this.yarndata = yarndata;
+        this.yarnDataService = new ZookeeperYarnDataProvider(yarndata);
+    }
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        this.yarndata = properties.getProperty("yarndata");
+        this.yarnDataService = new ZookeeperYarnDataProvider(yarndata);
+    }
+
+    @Override
+    public void start() throws Exception {
+        yarnDataService = new ZookeeperYarnDataProvider(yarndata);
+        yarnDataService.start();
+    }
+
+    @Override
+    public void stop() throws Exception {
+        if (yarnDataService != null) {
+            yarnDataService.stop();
+            yarnDataService = null;
+        }
+    }
+
+    @Override
+    public boolean exists(String id) {
+        return yarnDataService.exists(id);
+    }
+
+    @Override
+    public boolean isHealthy(String id) {
+        try {
+            return yarnDataService.read(id).state == ContainerState.ACTIVE;
+        } catch (Exception e) {
+            log.warn(String.format("Could not get activity data of %s", id));
+            return false;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnUtils.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnUtils.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnUtils.java
new file mode 100644
index 0000000..1051696
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnUtils.java
@@ -0,0 +1,174 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.UnsupportedFileSystemException;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.URL;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.io.Files;
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.TypeAdapter;
+import com.google.gson.stream.JsonReader;
+import com.google.gson.stream.JsonToken;
+import com.google.gson.stream.JsonWriter;
+
+/**
+ * Utility for writing property files, transferring data via HDFS and
+ * serializing {@link YarnContainerData} for zookeeper.
+ * 
+ */
+class YarnUtils {
+
+    static final Logger log                         = Logger.getLogger(YarnUtils.class);
+
+    static final String YARN_MASTER_ARCHIVE_PATH    = "target/metamanager-assembly.tar.gz";
+    static final String YARN_MASTER_PATH            = "master/metamanager/bin/yarn-master-process.sh";
+    static final String YARN_MASTER_STAGING         = "master.tar.gz";
+    static final String YARN_MASTER_DESTINATION     = "master";
+    static final String YARN_MASTER_PROPERTIES      = "master.properties";
+    static final String YARN_CONTAINER_ARCHIVE_PATH = "target/metamanager-assembly.tar.gz";
+    static final String YARN_CONTAINER_STAGING      = "container.tar.gz";
+    static final String YARN_CONTAINER_PATH         = "container/metamanager/bin/yarn-container-process.sh";
+    static final String YARN_CONTAINER_DESTINATION  = "container";
+    static final String YARN_CONTAINER_PROPERTIES   = "container.properties";
+
+    static Gson         gson;
+    static {
+        GsonBuilder builder = new GsonBuilder();
+        builder.registerTypeAdapter(ContainerState.class, new ContainerStateAdapter());
+        builder.setPrettyPrinting();
+        gson = builder.create();
+    }
+
+    public static String toJson(YarnContainerData meta) {
+        return gson.toJson(meta);
+    }
+
+    public static YarnContainerData fromJson(String json) {
+        return gson.fromJson(json, YarnContainerData.class);
+    }
+
+    public static Properties getPropertiesFromPath(String path) throws IOException {
+        Properties properties = new Properties();
+        properties.load(new InputStreamReader(new FileInputStream(path)));
+        return properties;
+    }
+
+    public static File writePropertiesToTemp(Properties properties) throws IOException {
+        File tmpFile = File.createTempFile("provider", ".properties");
+        Writer writer = Files.newWriter(tmpFile, Charset.defaultCharset());
+        properties.store(writer, null);
+        writer.flush();
+        writer.close();
+        return tmpFile;
+    }
+
+    public static Path copyToHdfs(String source, String dest, String namespace, Configuration conf) throws IOException {
+        Path sourcePath = makeQualified(source);
+        Path destPath = makeQualified(conf.get(FileSystem.FS_DEFAULT_NAME_KEY) + "/" + namespace + "/" + dest);
+        log.debug(String.format("Copying '%s' to '%s'", sourcePath, destPath));
+
+        FileSystem fs = FileSystem.get(conf);
+        fs.copyFromLocalFile(false, true, sourcePath, destPath);
+        fs.close();
+        return destPath;
+    }
+
+    public static void destroyHdfsNamespace(String namespace, Configuration conf) throws IOException {
+        Path path = makeQualified(conf.get(FileSystem.FS_DEFAULT_NAME_KEY) + "/" + namespace);
+        log.debug(String.format("Deleting '%s'", path));
+
+        FileSystem fs = FileSystem.get(conf);
+        fs.delete(path, true);
+        fs.close();
+    }
+
+    public static LocalResource createHdfsResource(Path path, LocalResourceType type, Configuration conf) throws IOException {
+        FileSystem fs = FileSystem.get(conf);
+
+        URL url = ConverterUtils.getYarnUrlFromPath(path);
+
+        FileStatus status = fs.getFileStatus(path);
+
+        LocalResource resource = Records.newRecord(LocalResource.class);
+        resource.setResource(url);
+        resource.setSize(status.getLen());
+        resource.setTimestamp(status.getModificationTime());
+        resource.setType(type);
+        resource.setVisibility(LocalResourceVisibility.APPLICATION);
+
+        fs.close();
+
+        return resource;
+    }
+
+    static Path makeQualified(String path) throws UnsupportedFileSystemException {
+        return FileContext.getFileContext().makeQualified(new Path(path));
+    }
+
+    static class ContainerStateAdapter extends TypeAdapter<ContainerState> {
+        @Override
+        public ContainerState read(JsonReader reader) throws IOException {
+            if (reader.peek() == JsonToken.NULL) {
+                reader.nextNull();
+                return null;
+            }
+            return ContainerState.valueOf(reader.nextString());
+        }
+
+        @Override
+        public void write(JsonWriter writer, ContainerState value) throws IOException {
+            if (value == null) {
+                writer.nullValue();
+                return;
+            }
+            writer.value(value.name());
+        }
+    }
+
+    static YarnContainerProcessProperties createContainerProcessProperties(Properties properties) {
+        Preconditions.checkNotNull(properties);
+        YarnContainerProcessProperties yarnProp = new YarnContainerProcessProperties();
+        yarnProp.putAll(properties);
+        return yarnProp;
+    }
+
+    static YarnContainerProviderProperties createContainerProviderProperties(Properties properties) {
+        Preconditions.checkNotNull(properties);
+        YarnContainerProviderProperties yarnProp = new YarnContainerProviderProperties();
+        yarnProp.putAll(properties);
+        return yarnProp;
+    }
+
+    static YarnMasterProperties createMasterProperties(Properties properties) {
+        Preconditions.checkNotNull(properties);
+        YarnMasterProperties yarnProp = new YarnMasterProperties();
+        yarnProp.putAll(properties);
+        return yarnProp;
+    }
+
+    private YarnUtils() {
+        // left blank
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/ZookeeperYarnDataProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/ZookeeperYarnDataProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/ZookeeperYarnDataProvider.java
new file mode 100644
index 0000000..32f8c79
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/ZookeeperYarnDataProvider.java
@@ -0,0 +1,100 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Properties;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.autoscale.Service;
+import org.apache.log4j.Logger;
+
+/**
+ * Configurable and runnable service for {@link YarnDataProvider} based on
+ * zookeeper.
+ * 
+ */
+public class ZookeeperYarnDataProvider implements YarnDataProvider, Service {
+
+    static final Logger log                 = Logger.getLogger(ZookeeperYarnDataProvider.class);
+
+    static final String CONTAINER_NAMESPACE = "containers";
+
+    static final String BASE_PATH           = "/" + CONTAINER_NAMESPACE;
+
+    static final int    META_TIMEOUT        = 5000;
+    static final long   POLL_INTERVAL       = 100;
+
+    String              yarndata;
+
+    ZkClient            client;
+
+    public ZookeeperYarnDataProvider() {
+        // left blank
+    }
+
+    public ZookeeperYarnDataProvider(String yarndataAddress) {
+        this.yarndata = yarndataAddress;
+    }
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        this.yarndata = properties.getProperty("yarndata");
+    }
+
+    @Override
+    public void start() {
+        log.debug(String.format("starting yarndata service for '%s'", yarndata));
+
+        client = new ZkClient(yarndata, META_TIMEOUT, META_TIMEOUT);
+
+        client.createPersistent(BASE_PATH, true);
+    }
+
+    @Override
+    public void stop() {
+        log.debug(String.format("stopping yarndata service for '%s'", yarndata));
+        if (client != null) {
+            client.close();
+            client = null;
+        }
+    }
+
+    @Override
+    public boolean exists(String id) {
+        return client.exists(makePath(id));
+    }
+
+    @Override
+    public void create(YarnContainerData meta) throws Exception {
+        client.createEphemeral(makePath(meta.id), YarnUtils.toJson(meta));
+    }
+
+    @Override
+    public YarnContainerData read(String id) throws Exception {
+        return YarnUtils.fromJson(client.<String> readData(makePath(id)));
+    }
+
+    @Override
+    public Collection<YarnContainerData> readAll() throws Exception {
+        Collection<YarnContainerData> yarndata = new ArrayList<YarnContainerData>();
+        for (String id : client.getChildren(BASE_PATH)) {
+            yarndata.add(YarnUtils.fromJson(client.<String> readData(makePath(id))));
+        }
+        return yarndata;
+    }
+
+    @Override
+    public void update(YarnContainerData meta) throws Exception {
+        client.writeData(makePath(meta.id), YarnUtils.toJson(meta));
+    }
+
+    @Override
+    public void delete(String id) throws Exception {
+        client.delete(makePath(id));
+    }
+
+    String makePath(String containerId) {
+        return BASE_PATH + "/" + containerId;
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderProcess.java
new file mode 100644
index 0000000..2fe3166
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderProcess.java
@@ -0,0 +1,82 @@
+package org.apache.helix.autoscale.provider;
+
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.autoscale.ClusterAdmin;
+import org.apache.helix.autoscale.ContainerProvider;
+import org.apache.helix.autoscale.HelixClusterAdmin;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Helix participant for ContainerProvider. Configurable via ProviderProperties
+ * and runnable service.
+ * 
+ */
+public class ProviderProcess implements Service {
+    static final Logger log = Logger.getLogger(ProviderProcess.class);
+
+    ClusterAdmin        admin;
+
+    ProviderProperties  properties;
+    ContainerProvider   provider;
+    HelixAdmin          helixAdmin;
+    HelixManager        participantManager;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        Preconditions.checkNotNull(properties);
+        ProviderProperties providerProperties = new ProviderProperties();
+        providerProperties.putAll(properties);
+        Preconditions.checkArgument(providerProperties.isValid());
+
+        this.properties = providerProperties;
+
+    }
+
+    public void setConteinerProvider(ContainerProvider provider) {
+        this.provider = provider;
+    }
+
+    @Override
+    public void start() throws Exception {
+        Preconditions.checkNotNull(provider);
+
+        log.info(String.format("Registering provider '%s' at '%s/%s'", properties.getName(), properties.getMetaAddress(), properties.getMetaCluster()));
+        HelixAdmin metaHelixAdmin = new ZKHelixAdmin(properties.getMetaAddress());
+        metaHelixAdmin.addInstance(properties.getMetaCluster(), new InstanceConfig(properties.getName()));
+        metaHelixAdmin.close();
+
+        log.info(String.format("Starting provider '%s'", properties.getName()));
+        helixAdmin = new ZKHelixAdmin(properties.getAddress());
+        admin = new HelixClusterAdmin(properties.getCluster(), helixAdmin);
+
+        participantManager = HelixManagerFactory.getZKHelixManager(properties.getMetaCluster(), properties.getName(), InstanceType.PARTICIPANT,
+                properties.getMetaAddress());
+        participantManager.getStateMachineEngine().registerStateModelFactory("OnlineOffline", new ProviderStateModelFactory(provider, admin));
+        participantManager.connect();
+
+        log.info(String.format("Successfully started provider '%s'", properties.getName()));
+    }
+
+    @Override
+    public void stop() {
+        log.info(String.format("Stopping provider '%s'", properties.getName()));
+        if (participantManager != null) {
+            participantManager.disconnect();
+            participantManager = null;
+        }
+        if (helixAdmin != null) {
+            helixAdmin.close();
+            helixAdmin = null;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderProperties.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderProperties.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderProperties.java
new file mode 100644
index 0000000..eef9fad
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderProperties.java
@@ -0,0 +1,97 @@
+package org.apache.helix.autoscale.provider;
+
+import java.util.Collections;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.helix.autoscale.bootstrapper.BootUtils;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base configuration for {@link ProviderProcess}. 
+ *
+ */
+public class ProviderProperties extends Properties {
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = -2209509977839674160L;
+	
+	public final static String ADDRESS = "address";
+	public final static String CLUSTER = "cluster";
+    public final static String METAADDRESS = "metaaddress";
+    public final static String METACLUSTER = "metacluster";
+	public final static String NAME = "name";
+	
+	public final static String CONTAINER_NAMESPACE = "containers";
+	
+	public boolean isValid() {
+		return(containsKey(ADDRESS) &&
+		       containsKey(CLUSTER) &&
+		       containsKey(METAADDRESS) &&
+               containsKey(METACLUSTER) &&
+               containsKey(NAME));
+	}
+	
+	public String getAddress() {
+		return getProperty(ADDRESS);
+	}
+	
+	public String getCluster() {
+	    return getProperty(CLUSTER);
+	}
+	
+    public String getMetaAddress() {
+        return getProperty(METAADDRESS);
+    }
+    
+    public String getMetaCluster() {
+        return getProperty(METACLUSTER);
+    }
+    
+	public String getName() {
+	    return getProperty(NAME);
+	}
+	
+	public Set<String> getContainers() {
+        if(!BootUtils.hasNamespace(this, CONTAINER_NAMESPACE))
+            return Collections.emptySet();
+	    return BootUtils.getNamespaces(BootUtils.getNamespace(this, CONTAINER_NAMESPACE));
+	}
+	
+	public boolean hasContainer(String id) {
+	    if(!BootUtils.hasNamespace(this, CONTAINER_NAMESPACE)) return false;
+	    if(!BootUtils.hasNamespace(BootUtils.getNamespace(this, CONTAINER_NAMESPACE), id)) return false;
+	    return true;
+	}
+	
+	public Properties getContainer(String id) {
+	    Preconditions.checkArgument(BootUtils.hasNamespace(this, CONTAINER_NAMESPACE), "no container namespace");
+        Preconditions.checkArgument(BootUtils.hasNamespace(BootUtils.getNamespace(this, CONTAINER_NAMESPACE), id), "container %s not configured", id);
+	    return BootUtils.getNamespace(BootUtils.getNamespace(this, CONTAINER_NAMESPACE), id);
+	}
+	
+	public void addContainer(String id, Properties properties) {
+	    Preconditions.checkArgument(!getContainers().contains(id), "Already contains container type %s", id);
+	    
+	    // add container config
+        for(Map.Entry<Object, Object> entry : properties.entrySet()) {
+            this.put(CONTAINER_NAMESPACE + "." + id + "." + entry.getKey(), entry.getValue());
+        }
+	}
+
+    @Override
+    public Object get(Object key) {
+        Preconditions.checkState(containsKey(key));
+        return super.get(key);
+    }
+    
+    @Override
+    public String getProperty(String key) {
+        Preconditions.checkState(containsKey(key));
+        return super.getProperty(key);
+    }
+    
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderRebalancer.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderRebalancer.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderRebalancer.java
new file mode 100644
index 0000000..2b6e428
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderRebalancer.java
@@ -0,0 +1,352 @@
+package org.apache.helix.autoscale.provider;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.autoscale.StatusProvider;
+import org.apache.helix.autoscale.TargetProvider;
+import org.apache.helix.controller.rebalancer.Rebalancer;
+import org.apache.helix.controller.stages.ClusterDataCache;
+import org.apache.helix.controller.stages.CurrentStateOutput;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.LiveInstance;
+import org.apache.helix.model.Partition;
+import org.apache.helix.model.Resource;
+import org.apache.helix.model.ResourceAssignment;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Sets;
+
+/**
+ * Rebalancer for meta cluster. Polls {@link TargetProvider} and
+ * {@link StatusProvider} and reads and sets IdealState of meta cluster participants (
+ * {@link ProviderProcess}). The number of active container is set to the target
+ * count. Failed containers are shut down and restarted on any available
+ * provider. Also, container counts are balanced across multiple providers.<br/>
+ * <b>NOTE:</b> status and target provider are injected via
+ * {@link ProviderRebalancerSingleton}<br/>
+ * <br/>
+ * <b>IdealState mapping:</b><br/>
+ * resource = container type<br/>
+ * partition = logical container instance<br/>
+ * instance = container provider<br/>
+ * status = physical container instance presence<br/>
+ */
+public class ProviderRebalancer implements Rebalancer {
+
+    static final Logger log                 = Logger.getLogger(ProviderRebalancer.class);
+
+    static final long   UPDATE_INTERVAL_MIN = 1500;
+
+    static final Object lock                = new Object();
+    static long         nextUpdate          = 0;
+
+    TargetProvider      targetProvider;
+    StatusProvider      statusProvider;
+    HelixManager        manager;
+
+    @Override
+    public void init(HelixManager manager) {
+        this.targetProvider = ProviderRebalancerSingleton.getTargetProvider();
+        this.statusProvider = ProviderRebalancerSingleton.getStatusProvider();
+        this.manager = manager;
+    }
+
+    @Override
+    public ResourceAssignment computeResourceMapping(Resource resource, IdealState idealState, CurrentStateOutput currentStateOutput,
+            ClusterDataCache clusterData) {
+
+        final String resourceName = resource.getResourceName();
+        final String containerType = resourceName;
+
+        final SortedSet<String> allContainers = Sets.newTreeSet(new IndexedNameComparator());
+        allContainers.addAll(idealState.getPartitionSet());
+
+        final SortedSet<String> allProviders = Sets.newTreeSet(new IndexedNameComparator());
+        for (LiveInstance instance : clusterData.getLiveInstances().values()) {
+            allProviders.add(instance.getId());
+        }
+
+        final ResourceState currentState = new ResourceState(resourceName, currentStateOutput);
+
+        // target container count
+        log.debug(String.format("Retrieving target container count for type '%s'", containerType));
+        int targetCount = -1;
+        try {
+            targetCount = targetProvider.getTargetContainerCount(containerType);
+        } catch (Exception e) {
+            log.error(String.format("Could not retrieve target count for '%s'", containerType), e);
+            return new ResourceAssignment(resourceName);
+        }
+
+        // provider sanity check
+        if (allProviders.isEmpty()) {
+            log.warn(String.format("Could not find any providers"));
+            return new ResourceAssignment(resourceName);
+        }
+
+        // all containers
+        SortedSet<String> assignedContainers = getAssignedContainers(currentState, allContainers);
+        SortedSet<String> failedContainers = getFailedContainers(currentState, allContainers);
+
+        log.info(String.format("Rebalancing '%s' (target=%d, active=%d, failures=%d)", resourceName, targetCount, assignedContainers.size(),
+                failedContainers.size()));
+
+        if (log.isDebugEnabled()) {
+            log.debug(String.format("%s: assigned containers %s", resourceName, assignedContainers));
+            log.debug(String.format("%s: failed containers %s", resourceName, failedContainers));
+        }
+
+        // assignment
+        int maxCountPerProvider = (int) Math.ceil(targetCount / (float) allProviders.size());
+
+        ResourceAssignment assignment = new ResourceAssignment(resourceName);
+        CountMap counts = new CountMap(allProviders);
+        int assignmentCount = 0;
+
+        // currently assigned
+        for (String containerName : assignedContainers) {
+            String providerName = getProvider(currentState, containerName);
+            Partition partition = new Partition(containerName);
+
+            if (failedContainers.contains(containerName)) {
+                log.warn(String.format("Container '%s:%s' failed, going offline", providerName, containerName));
+                assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "OFFLINE"));
+
+            } else if (counts.get(providerName) >= maxCountPerProvider) {
+                log.warn(String.format("Container '%s:%s' misassigned, going offline", providerName, containerName));
+                assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "OFFLINE"));
+
+            } else {
+                assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "ONLINE"));
+            }
+
+            counts.increment(providerName);
+            assignmentCount++;
+        }
+
+        // currently unassigned
+        SortedSet<String> unassignedContainers = Sets.newTreeSet(new IndexedNameComparator());
+        unassignedContainers.addAll(allContainers);
+        unassignedContainers.removeAll(assignedContainers);
+
+        for (String containerName : unassignedContainers) {
+            if (assignmentCount >= targetCount)
+                break;
+
+            String providerName = counts.getMinKey();
+            Partition partition = new Partition(containerName);
+
+            if (failedContainers.contains(containerName)) {
+                log.warn(String.format("Container '%s:%s' failed and unassigned, going offline", providerName, containerName));
+                assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "OFFLINE"));
+
+            } else {
+                assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "ONLINE"));
+            }
+
+            counts.increment(providerName);
+            assignmentCount++;
+        }
+
+        if (log.isDebugEnabled()) {
+            log.debug(String.format("assignment counts: %s", counts));
+            log.debug(String.format("assignment: %s", assignment));
+        }
+
+        return assignment;
+    }
+
+    boolean hasProvider(ResourceState state, String containerName) {
+        Map<String, String> currentStateMap = state.getCurrentStateMap(containerName);
+        Map<String, String> pendingStateMap = state.getPendingStateMap(containerName);
+        return hasInstance(currentStateMap, "ONLINE") || hasInstance(pendingStateMap, "ONLINE");
+    }
+
+    String getProvider(ResourceState state, String containerName) {
+        Map<String, String> currentStateMap = state.getCurrentStateMap(containerName);
+        if (hasInstance(currentStateMap, "ONLINE"))
+            return getInstance(currentStateMap, "ONLINE");
+
+        Map<String, String> pendingStateMap = state.getPendingStateMap(containerName);
+        return getInstance(pendingStateMap, "ONLINE");
+    }
+
+    SortedSet<String> getFailedContainers(ResourceState state, Collection<String> containers) {
+        SortedSet<String> failedContainers = Sets.newTreeSet(new IndexedNameComparator());
+        for (String containerName : containers) {
+            Map<String, String> currentStateMap = state.getCurrentStateMap(containerName);
+            Map<String, String> pendingStateMap = state.getPendingStateMap(containerName);
+
+            if (hasInstance(currentStateMap, "ERROR")) {
+                failedContainers.add(containerName);
+                continue;
+            }
+
+            if (!hasInstance(currentStateMap, "ONLINE") || hasInstance(pendingStateMap, "OFFLINE"))
+                continue;
+
+            // container listed online and not in transition, but not active
+            if (!statusProvider.isHealthy(containerName)) {
+                log.warn(String.format("Container '%s' designated ONLINE, but is not active", containerName));
+                failedContainers.add(containerName);
+            }
+        }
+        return failedContainers;
+    }
+
+    SortedSet<String> getAssignedContainers(ResourceState state, Collection<String> containers) {
+        SortedSet<String> assignedContainers = Sets.newTreeSet(new IndexedNameComparator());
+        for (String containerName : containers) {
+
+            if (!hasProvider(state, containerName))
+                continue;
+
+            assignedContainers.add(containerName);
+        }
+        return assignedContainers;
+    }
+
+    boolean hasInstance(Map<String, String> stateMap, String state) {
+        if (!stateMap.isEmpty()) {
+            for (Map.Entry<String, String> entry : stateMap.entrySet()) {
+                if (entry.getValue().equals(state)) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
+    String getInstance(Map<String, String> stateMap, String state) {
+        if (!stateMap.isEmpty()) {
+            for (Map.Entry<String, String> entry : stateMap.entrySet()) {
+                if (entry.getValue().equals(state)) {
+                    return entry.getKey();
+                }
+            }
+        }
+        throw new IllegalArgumentException(String.format("Could not find instance with state '%s'", state));
+    }
+
+    class IndexedNameComparator implements Comparator<String> {
+        Pattern pattern = Pattern.compile("^(.*)([0-9]+)$");
+
+        @Override
+        public int compare(String o1, String o2) {
+            Matcher m1 = pattern.matcher(o1);
+            Matcher m2 = pattern.matcher(o2);
+
+            boolean find1 = m1.find();
+            boolean find2 = m2.find();
+
+            if (!find1 && !find2)
+                return o1.compareTo(o2);
+
+            if (!find1 && find2)
+                return -1;
+
+            if (find1 && !find2)
+                return 1;
+
+            String name1 = m1.group(1);
+            String name2 = m2.group(1);
+
+            int name_comp = name1.compareTo(name2);
+            if (name_comp != 0)
+                return name_comp;
+
+            int index1 = Integer.valueOf(m1.group(2));
+            int index2 = Integer.valueOf(m2.group(2));
+
+            return (int) Math.signum(index1 - index2);
+        }
+    }
+
+    class CountMap extends HashMap<String, Integer> {
+        /**
+         * 
+         */
+        private static final long serialVersionUID = 3954138748385337978L;
+
+        public CountMap(Collection<String> keys) {
+            super();
+            for (String key : keys) {
+                put(key, 0);
+            }
+        }
+
+        @Override
+        public Integer get(Object key) {
+            Preconditions.checkArgument(containsKey(key), "Key %s not found", key);
+            return super.get(key);
+        }
+
+        public int increment(String key) {
+            int newValue = get(key) + 1;
+            Preconditions.checkArgument(containsKey(key), "Key %s not found", key);
+            put(key, newValue);
+            return newValue;
+        }
+
+        public String getMinKey() {
+            Preconditions.checkState(size() > 0, "Must contain at least one item");
+
+            String minKey = null;
+            int minValue = Integer.MAX_VALUE;
+
+            for (String key : keySet()) {
+                int value = get(key);
+                if (value < minValue) {
+                    minValue = value;
+                    minKey = key;
+                }
+            }
+
+            return minKey;
+        }
+
+        public String getMaxKey() {
+            Preconditions.checkState(size() > 0, "Must contain at least one item");
+
+            String maxKey = null;
+            int maxValue = Integer.MIN_VALUE;
+
+            for (String key : keySet()) {
+                int value = get(key);
+                if (value > maxValue) {
+                    maxValue = value;
+                    maxKey = key;
+                }
+            }
+
+            return maxKey;
+        }
+    }
+
+    class ResourceState {
+        final String             resourceName;
+        final CurrentStateOutput state;
+
+        public ResourceState(String resourceName, CurrentStateOutput state) {
+            this.resourceName = resourceName;
+            this.state = state;
+        }
+
+        Map<String, String> getCurrentStateMap(String partitionName) {
+            return state.getCurrentStateMap(resourceName, new Partition(partitionName));
+        }
+
+        Map<String, String> getPendingStateMap(String partitionName) {
+            return state.getPendingStateMap(resourceName, new Partition(partitionName));
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderRebalancerSingleton.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderRebalancerSingleton.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderRebalancerSingleton.java
new file mode 100644
index 0000000..16b8829
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderRebalancerSingleton.java
@@ -0,0 +1,38 @@
+package org.apache.helix.autoscale.provider;
+
+import org.apache.helix.autoscale.StatusProvider;
+import org.apache.helix.autoscale.TargetProvider;
+import org.apache.log4j.Logger;
+
+/**
+ * Utility for dependency injection into ProviderRebalancer.
+ * 
+ */
+public class ProviderRebalancerSingleton {
+
+    static final Logger   log = Logger.getLogger(ProviderRebalancerSingleton.class);
+
+    static TargetProvider targetProvider;
+    static StatusProvider statusProvider;
+
+    private ProviderRebalancerSingleton() {
+        // left blank
+    }
+
+    public static TargetProvider getTargetProvider() {
+        return targetProvider;
+    }
+
+    public static void setTargetProvider(TargetProvider targetProvider) {
+        ProviderRebalancerSingleton.targetProvider = targetProvider;
+    }
+
+    public static StatusProvider getStatusProvider() {
+        return statusProvider;
+    }
+
+    public static void setStatusProvider(StatusProvider statusProvider) {
+        ProviderRebalancerSingleton.statusProvider = statusProvider;
+    }
+
+}


[02/15] Adding Helix-task-framework and Yarn integration modules

Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Shell.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/Boot2By2Shell.properties b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Shell.properties
new file mode 100644
index 0000000..830a586
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Shell.properties
@@ -0,0 +1,87 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=localhost:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=localhost:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=localhost:2199
+metaresource.0.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=localhost:2199
+metaresource.1.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=localhost:2199
+metaprovider.0.class=org.apache.helix.metamanager.impl.shell.ShellContainerProviderProcess
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=localhost:2199
+metaprovider.1.class=org.apache.helix.metamanager.impl.shell.ShellContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.metamanager.impl.shell.ShellStatusProvider
+
+metacontroller.target.class=org.apache.helix.metamanager.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Yarn.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/Boot2By2Yarn.properties b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Yarn.properties
new file mode 100644
index 0000000..6d220eb
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Yarn.properties
@@ -0,0 +1,98 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=rm:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=rm:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=rm:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=rm:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=rm:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=rm:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=rm:2199
+metaresource.0.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=rm:2199
+metaresource.1.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=rm:2199
+metaprovider.0.class=org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProcess
+metaprovider.0.yarndata=rm:2199
+metaprovider.0.resourcemananger=rm:8032
+metaprovider.0.scheduler=rm:8030
+metaprovider.0.user=yarn
+metaprovider.0.hdfs=hdfs://rm:9000/
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=rm:2199
+metaprovider.1.class=org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProcess
+metaprovider.1.yarndata=rm:2199
+metaprovider.1.resourcemananger=rm:8032
+metaprovider.1.scheduler=rm:8030
+metaprovider.1.user=yarn
+metaprovider.1.hdfs=hdfs://rm:9000/
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=rm:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.metamanager.impl.yarn.YarnStatusProvider
+metacontroller.status.yarndata=rm:2199
+
+metacontroller.target.class=org.apache.helix.metamanager.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/BootLocal.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/BootLocal.properties b/recipes/meta-cluster-manager/src/main/resources/BootLocal.properties
new file mode 100644
index 0000000..a86c9f0
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/BootLocal.properties
@@ -0,0 +1,68 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.name=resource
+resource.cluster=cluster
+resource.address=localhost:2199
+resource.container=container
+resource.model=MasterSlave
+resource.partitions=10
+resource.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.name=container
+metaresource.metacluster=meta
+metaresource.metaaddress=localhost:2199
+metaresource.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.name=provider
+metaprovider.metacluster=meta
+metaprovider.metaaddress=localhost:2199
+metaprovider.class=org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.metamanager.impl.local.LocalStatusProvider
+
+metacontroller.target.class=org.apache.helix.metamanager.impl.StaticTargetProvider
+metacontroller.target.container=7

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/boot/cluster.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/boot/cluster.properties b/recipes/meta-cluster-manager/src/main/resources/boot/cluster.properties
new file mode 100644
index 0000000..17d9406
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/boot/cluster.properties
@@ -0,0 +1,2 @@
+cluster.cluster=managed
+cluster.address=localhost:2199
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/boot/controller.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/boot/controller.properties b/recipes/meta-cluster-manager/src/main/resources/boot/controller.properties
new file mode 100644
index 0000000..1d96260
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/boot/controller.properties
@@ -0,0 +1,4 @@
+controller.name=controller
+controller.cluster=managed
+controller.address=localhost:2199
+controller.autorefresh=-1
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/boot/metacluster.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/boot/metacluster.properties b/recipes/meta-cluster-manager/src/main/resources/boot/metacluster.properties
new file mode 100644
index 0000000..f1e6062
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/boot/metacluster.properties
@@ -0,0 +1,4 @@
+metacluster.cluster=managed
+metacluster.address=localhost:2199
+metacluster.metacluster=meta
+metacluster.metaaddress=localhost:2199

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/boot/metacontroller.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/boot/metacontroller.properties b/recipes/meta-cluster-manager/src/main/resources/boot/metacontroller.properties
new file mode 100644
index 0000000..133ac69
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/boot/metacontroller.properties
@@ -0,0 +1,4 @@
+controller.name=metacontroller
+controller.cluster=meta
+controller.address=localhost:2199
+controller.autorefresh=5000
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/boot/resdb.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/boot/resdb.properties b/recipes/meta-cluster-manager/src/main/resources/boot/resdb.properties
new file mode 100644
index 0000000..0830e50
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/boot/resdb.properties
@@ -0,0 +1,4 @@
+name=zookeeper
+datadir=/tmp/zk/data
+logdir=/tmp/zk/log
+port=2199

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/boot/resws.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/boot/resws.properties b/recipes/meta-cluster-manager/src/main/resources/boot/resws.properties
new file mode 100644
index 0000000..0830e50
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/boot/resws.properties
@@ -0,0 +1,4 @@
+name=zookeeper
+datadir=/tmp/zk/data
+logdir=/tmp/zk/log
+port=2199

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/boot/zookeeper.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/boot/zookeeper.properties b/recipes/meta-cluster-manager/src/main/resources/boot/zookeeper.properties
new file mode 100644
index 0000000..04587c8
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/boot/zookeeper.properties
@@ -0,0 +1,4 @@
+zookeeper.name=zookeeper
+zookeeper.datadir=/tmp/zk/data
+zookeeper.logdir=/tmp/zk/log
+zookeeper.port=2199

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/container.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/container.properties b/recipes/meta-cluster-manager/src/main/resources/container.properties
new file mode 100644
index 0000000..8817165
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/container.properties
@@ -0,0 +1 @@
+class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/log4j.properties b/recipes/meta-cluster-manager/src/main/resources/log4j.properties
new file mode 100644
index 0000000..af33e21
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/log4j.properties
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Set root logger level to DEBUG and its only appender to R.
+log4j.rootLogger=ERROR, C
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.C=org.apache.log4j.ConsoleAppender
+log4j.appender.C.layout=org.apache.log4j.PatternLayout
+log4j.appender.C.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.I0Itec=ERROR
+log4j.logger.org.apache=ERROR
+
+log4j.logger.org.apache.helix.metamanager=INFO

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/redisLocal.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/redisLocal.properties b/recipes/meta-cluster-manager/src/main/resources/redisLocal.properties
new file mode 100644
index 0000000..47c0800
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/redisLocal.properties
@@ -0,0 +1,50 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Meta Cluster
+#
+meta.cluster=meta
+meta.managed=managed
+meta.address=rm:2199
+
+meta.target.type=redis
+meta.target.address=rm:2199
+meta.target.root=redis
+meta.target.records=100000
+meta.target.clients=20
+meta.target.requests=100000
+meta.target.target.get=100000
+meta.target.target.set=100000
+
+meta.status.type=local
+meta.status.metadata=rm:2199
+
+meta.provider.type=local
+meta.provider.name=provider_0
+meta.provider.address=rm:2199
+meta.provider.cluster=managed
+
+meta.provider.containers=cache
+
+meta.provider.container.cache.class=org.apache.helix.metamanager.impl.container.RedisServerProcess
+meta.provider.container.cache.address=rm:2199
+meta.provider.container.cache.root=redis
+meta.provider.container.cache.baseport=17000
+
+#
+# Managed Cluster
+#
+managed.cluster=managed
+managed.address=rm:2199
+
+managed.resources=devcache
+
+managed.resource.devcache.container=cache
+managed.resource.devcache.model=OnlineOffline
+managed.resource.devcache.partitions=10
+managed.resource.devcache.replica=1

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/redisYarn.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/redisYarn.properties b/recipes/meta-cluster-manager/src/main/resources/redisYarn.properties
new file mode 100644
index 0000000..1862781
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/redisYarn.properties
@@ -0,0 +1,52 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/export/home/eng/apucher/zk/data
+zookeeper.logdir=/export/home/eng/apucher/zk/log
+zookeeper.port=2199
+
+#
+# Meta Cluster
+#
+meta.address=rm:2199
+meta.interval=5000
+
+meta.target.type=redis
+meta.target.address=rm:2199
+meta.target.root=redis
+meta.target.interval=20000
+meta.target.timeout=10000
+meta.target.get=250000
+meta.target.min=3
+meta.target.max=23
+
+meta.status.type=yarn
+meta.status.metadata=rm:2199
+
+meta.provider.type=yarn
+meta.provider.name=provider_0
+meta.provider.address=rm:2199
+meta.provider.cluster=managed
+meta.provider.metadata=rm:2199
+meta.provider.resourcemananger=rm:8032
+meta.provider.scheduler=rm:8030
+meta.provider.user=yarn
+meta.provider.hdfs=hdfs://rm:9000/
+meta.provider.containers=cache
+
+meta.provider.container.cache.class=org.apache.helix.metamanager.impl.container.RedisServerProcess
+meta.provider.container.cache.address=rm:2199
+meta.provider.container.cache.root=redis
+meta.provider.container.cache.baseport=17000
+
+#
+# Managed Cluster
+#
+managed.address=rm:2199
+
+managed.resources=devcache
+
+managed.resource.devcache.container=cache
+managed.resource.devcache.model=OnlineOffline
+managed.resource.devcache.partitions=10
+managed.resource.devcache.replica=1

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/conf/testng-integration.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/conf/testng-integration.xml b/recipes/meta-cluster-manager/src/test/conf/testng-integration.xml
new file mode 100644
index 0000000..ed7d1c9
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/conf/testng-integration.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="Suite" parallel="none">
+  <test name="Test" preserve-order="false">
+    <packages>
+      <package name="org.apache.helix.metamanager.integration.*"/>
+    </packages>
+  </test>
+</suite>

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/conf/testng-unit.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/conf/testng-unit.xml b/recipes/meta-cluster-manager/src/test/conf/testng-unit.xml
new file mode 100644
index 0000000..e178e4a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/conf/testng-unit.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="Suite" parallel="none">
+  <test name="Test" preserve-order="false">
+    <packages>
+      <package name="org.apache.helix.metamanager.unit.*"/>
+    </packages>
+  </test>
+</suite>

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/conf/testng.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/conf/testng.xml b/recipes/meta-cluster-manager/src/test/conf/testng.xml
new file mode 100644
index 0000000..8c3517f
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/conf/testng.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="Suite" parallel="none">
+  <test name="Test" preserve-order="false">
+    <packages>
+      <package name="org.apache.helix.*"/>
+    </packages>
+  </test>
+</suite>

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/config/testng-integration.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/config/testng-integration.xml b/recipes/meta-cluster-manager/src/test/config/testng-integration.xml
new file mode 100644
index 0000000..ed7d1c9
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/config/testng-integration.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="Suite" parallel="none">
+  <test name="Test" preserve-order="false">
+    <packages>
+      <package name="org.apache.helix.metamanager.integration.*"/>
+    </packages>
+  </test>
+</suite>

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/config/testng-unit.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/config/testng-unit.xml b/recipes/meta-cluster-manager/src/test/config/testng-unit.xml
new file mode 100644
index 0000000..e178e4a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/config/testng-unit.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="Suite" parallel="none">
+  <test name="Test" preserve-order="false">
+    <packages>
+      <package name="org.apache.helix.metamanager.unit.*"/>
+    </packages>
+  </test>
+</suite>

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/config/testng.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/config/testng.xml b/recipes/meta-cluster-manager/src/test/config/testng.xml
new file mode 100644
index 0000000..aa8f190
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/config/testng.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="Suite" parallel="none">
+  <test name="Test" preserve-order="true">
+    <packages>
+      <package name="org.apache.helix.metamanager.*"/>
+    </packages>
+  </test>
+</suite>

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/BootstrapperIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/BootstrapperIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/BootstrapperIT.java
new file mode 100644
index 0000000..7809bab
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/BootstrapperIT.java
@@ -0,0 +1,134 @@
+package org.apache.helix.metamanager;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.bootstrapper.Boot;
+import org.apache.helix.metamanager.bootstrapper.ClusterService;
+import org.apache.helix.metamanager.bootstrapper.ControllerService;
+import org.apache.helix.metamanager.bootstrapper.MetaClusterService;
+import org.apache.helix.metamanager.bootstrapper.MetaControllerService;
+import org.apache.helix.metamanager.bootstrapper.MetaProviderService;
+import org.apache.helix.metamanager.bootstrapper.MetaResourceService;
+import org.apache.helix.metamanager.bootstrapper.ResourceService;
+import org.apache.helix.metamanager.bootstrapper.ZookeeperService;
+import org.apache.log4j.Logger;
+import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Bootstrapping tool test. Reads cluster configuration from *.properties files,
+ * spawns services and verifies number of active partitions and containers
+ * 
+ * @see Boot
+ */
+@Test(groups = { "integration", "boot" })
+public class BootstrapperIT {
+
+    static final Logger log = Logger.getLogger(BootstrapperIT.class);
+
+    Boot                boot;
+    HelixAdmin          admin;
+
+    @AfterMethod(alwaysRun = true)
+    public void teardown() throws Exception {
+        log.debug("tearing down bootstrap test");
+        if (admin != null) {
+            admin.close();
+            admin = null;
+        }
+        if (boot != null) {
+            boot.stop();
+            boot = null;
+        }
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+    public void bootstrapLocalTest() throws Exception {
+        boot = new Boot();
+        boot.configure(getProperties("BootLocal.properties"));
+        boot.start();
+
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), ZookeeperService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), ClusterService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), ResourceService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), ControllerService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaClusterService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaResourceService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaProviderService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaControllerService.class));
+
+        final long limit = System.currentTimeMillis() + TestUtils.REBALANCE_TIMEOUT;
+
+        admin = new ZKHelixAdmin("localhost:2199");
+        waitUntil(admin, "meta", "container", 1, 7, (limit - System.currentTimeMillis()));
+        waitUntil(admin, "cluster", "resource", 7, 10, (limit - System.currentTimeMillis()));
+
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+    public void bootstrap2By2LocalTest() throws Exception {
+        boot = new Boot();
+        boot.configure(getProperties("Boot2By2Local.properties"));
+        boot.start();
+
+        verify2By2Setup();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+    public void bootstrap2By2ShellTest() throws Exception {
+        boot = new Boot();
+        boot.configure(getProperties("Boot2By2Shell.properties"));
+        boot.start();
+
+        verify2By2Setup();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+    public void bootstrap2By2YarnTest() throws Exception {
+        boot = new Boot();
+        boot.configure(getProperties("Boot2By2Yarn.properties"));
+        boot.start();
+
+        verify2By2Setup();
+    }
+
+    void verify2By2Setup() throws Exception {
+        final long limit = System.currentTimeMillis() + TestUtils.REBALANCE_TIMEOUT;
+        final String address = "localhost:2199";
+
+        log.debug(String.format("connecting to zookeeper at '%s'", address));
+
+        admin = new ZKHelixAdmin(address);
+        waitUntil(admin, "meta", "database", 2, 3, (limit - System.currentTimeMillis()));
+        waitUntil(admin, "meta", "webserver", 2, 5, (limit - System.currentTimeMillis()));
+        waitUntil(admin, "cluster", "dbprod", 3, 8, (limit - System.currentTimeMillis()));
+        waitUntil(admin, "cluster", "wsprod", 5, 15, (limit - System.currentTimeMillis()));
+    }
+
+    static void waitUntil(HelixAdmin admin, String cluster, String resource, int instanceCount, int partitionCount, long timeout) throws Exception {
+        final long limit = System.currentTimeMillis() + timeout;
+        TestUtils.waitUntilInstanceCount(admin, cluster, resource, instanceCount, (limit - System.currentTimeMillis()));
+        TestUtils.waitUntilPartitionCount(admin, cluster, resource, partitionCount, (limit - System.currentTimeMillis()));
+    }
+
+    static Properties getProperties(String resourcePath) throws IOException {
+        Properties properties = new Properties();
+        properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+        return properties;
+    }
+
+    static boolean containsInstanceOf(Collection<Service> services, Class<?> clazz) {
+        for (Service service : services) {
+            if (clazz.isAssignableFrom(service.getClass()))
+                return true;
+        }
+        return false;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/FailoverIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/FailoverIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/FailoverIT.java
new file mode 100644
index 0000000..90e8be0
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/FailoverIT.java
@@ -0,0 +1,195 @@
+package org.apache.helix.metamanager;
+
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.metamanager.impl.local.LocalContainerSingleton;
+import org.apache.helix.metamanager.impl.local.LocalStatusProvider;
+import org.apache.helix.metamanager.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.metamanager.impl.shell.ShellContainerSingleton;
+import org.apache.helix.metamanager.impl.shell.ShellStatusProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.metamanager.impl.yarn.YarnStatusProvider;
+import org.apache.helix.metamanager.impl.yarn.ZookeeperYarnDataProvider;
+import org.apache.helix.metamanager.provider.ProviderRebalancer;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Fault-recovery test for individual containers and whole providers. Missing
+ * containers should be replaced by the meta cluster Rebalancer using remaining
+ * active providers.
+ * 
+ * @see ProviderRebalancer
+ */
+@Test(groups = { "integration", "failure" })
+public class FailoverIT {
+
+    static final Logger  log             = Logger.getLogger(FailoverIT.class);
+
+    static final int     CONTAINER_COUNT = 7;
+
+    StaticTargetProvider targetProvider;
+    YarnStatusProvider   yarnStatusProvider;
+
+    @BeforeClass(alwaysRun = true)
+    public void setupClass() {
+        log.info("installing shutdown hook");
+        Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+            @Override
+            public void run() {
+                try {
+                    teardownTest();
+                } catch (Exception ignore) {
+                }
+                ;
+            }
+        }));
+    }
+
+    @BeforeMethod(alwaysRun = true)
+    public void setupTest() throws Exception {
+        teardownTest();
+        targetProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+    }
+
+    @AfterMethod(alwaysRun = true)
+    public void teardownTest() throws Exception {
+        TestUtils.stopTestCluster();
+
+        if (yarnStatusProvider != null) {
+            yarnStatusProvider.stop();
+            yarnStatusProvider = null;
+        }
+
+        TestUtils.stopZookeeper();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+    public void testLocalContainerFailover() throws Exception {
+        log.info("testing local container failover");
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        TestUtils.startTestCluster(targetProvider, new LocalStatusProvider(), makeLocalProviders(3));
+        killLocalContainers();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+    public void testLocalProviderFailover() throws Exception {
+        log.info("testing local provider failover");
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        TestUtils.startTestCluster(targetProvider, new LocalStatusProvider(), makeLocalProviders(3));
+        killProvider();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+    public void testShellContainerFailover() throws Exception {
+        log.info("testing shell container failover");
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        TestUtils.startTestCluster(targetProvider, new ShellStatusProvider(), makeShellProviders(3));
+        killShellContainers();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+    public void testShellProviderFailover() throws Exception {
+        log.info("testing shell provider failover");
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        TestUtils.startTestCluster(targetProvider, new ShellStatusProvider(), makeShellProviders(3));
+        killProvider();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+    public void testYarnContainerFailover() throws Exception {
+        log.info("testing yarn container failover");
+        TestUtils.configure("distributed.properties");
+        TestUtils.startZookeeper();
+        yarnStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+        yarnStatusProvider.start();
+        TestUtils.startTestCluster(targetProvider, yarnStatusProvider, makeYarnProviders(3));
+        killYarnContainers();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+    public void testYarnProviderFailover() throws Exception {
+        log.info("testing yarn provider failover");
+        TestUtils.configure("distributed.properties");
+        TestUtils.startZookeeper();
+        yarnStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+        yarnStatusProvider.start();
+        TestUtils.startTestCluster(targetProvider, yarnStatusProvider, makeYarnProviders(3));
+        killProvider();
+    }
+
+    void killLocalContainers() throws Exception {
+        LocalContainerSingleton.killProcess("container_2");
+        LocalContainerSingleton.killProcess("container_4");
+        LocalContainerSingleton.killProcess("container_6");
+        Thread.sleep(3000);
+        TestUtils.rebalanceTestCluster();
+        TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+    }
+
+    void killShellContainers() throws Exception {
+        ShellContainerSingleton.killProcess("container_2");
+        ShellContainerSingleton.killProcess("container_4");
+        ShellContainerSingleton.killProcess("container_6");
+        Thread.sleep(3000);
+        TestUtils.rebalanceTestCluster();
+        TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+    }
+
+    void killYarnContainers() throws Exception {
+        ZookeeperYarnDataProvider yarnDataService = new ZookeeperYarnDataProvider(TestUtils.zkAddress);
+        yarnDataService.start();
+        yarnDataService.delete("container_2");
+        yarnDataService.delete("container_4");
+        yarnDataService.delete("container_6");
+        yarnDataService.stop();
+        Thread.sleep(3000);
+        TestUtils.rebalanceTestCluster();
+        TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+    }
+
+    static void killProvider() throws Exception {
+        Iterator<Service> itService = TestUtils.providerServices.iterator();
+        itService.next().stop();
+        itService.remove();
+
+        TestUtils.rebalanceTestCluster();
+        TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+    }
+
+    LocalContainerProviderProcess[] makeLocalProviders(int count) throws Exception {
+        LocalContainerProviderProcess[] localProviders = new LocalContainerProviderProcess[count];
+        for (int i = 0; i < count; i++) {
+            localProviders[i] = TestUtils.makeLocalProvider("provider_" + i);
+        }
+        return localProviders;
+    }
+
+    ShellContainerProviderProcess[] makeShellProviders(int count) throws Exception {
+        ShellContainerProviderProcess[] shellProviders = new ShellContainerProviderProcess[count];
+        for (int i = 0; i < count; i++) {
+            shellProviders[i] = TestUtils.makeShellProvider("provider_" + i);
+        }
+        return shellProviders;
+    }
+
+    YarnContainerProviderProcess[] makeYarnProviders(int count) throws Exception {
+        YarnContainerProviderProcess[] yarnProviders = new YarnContainerProviderProcess[count];
+        for (int i = 0; i < count; i++) {
+            yarnProviders[i] = TestUtils.makeYarnProvider("provider_" + i);
+        }
+        return yarnProviders;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/LocalContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/LocalContainerProviderIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/LocalContainerProviderIT.java
new file mode 100644
index 0000000..e1faddd
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/LocalContainerProviderIT.java
@@ -0,0 +1,80 @@
+package org.apache.helix.metamanager;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.metamanager.impl.local.LocalStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Local container provider and local status provider test. Scale-up and -down
+ * only, no failures.
+ * 
+ * @see LocalContainerProvider
+ * @see LocalStatusProvider
+ */
+@Test(groups = { "integration", "local" })
+public class LocalContainerProviderIT {
+
+    static final Logger           log             = Logger.getLogger(LocalContainerProviderIT.class);
+
+    static final int              CONTAINER_COUNT = 4;
+
+    StaticTargetProvider          clusterStatusProvider;
+    LocalContainerProviderProcess containerProvider;
+    LocalStatusProvider           containerStatusProvider;
+
+    @BeforeMethod(alwaysRun = true)
+    public void setupTest() throws Exception {
+        teardownTest();
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        containerProvider = TestUtils.makeLocalProvider("provider_0");
+        clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+        containerStatusProvider = new LocalStatusProvider();
+        TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+    }
+
+    @AfterMethod(alwaysRun = true)
+    public void teardownTest() throws Exception {
+        TestUtils.stopTestCluster();
+        TestUtils.stopZookeeper();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testStatic() throws Exception {
+        log.info("testing static");
+        setContainerCount(CONTAINER_COUNT);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleUp() throws Exception {
+        log.info("testing scale up");
+        setContainerCount(CONTAINER_COUNT + 2);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleDown() throws Exception {
+        log.info("testing scale down");
+        setContainerCount(CONTAINER_COUNT - 2);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleCycle() throws Exception {
+        log.info("testing scale cycle");
+        setContainerCount(CONTAINER_COUNT + 2);
+        setContainerCount(CONTAINER_COUNT);
+        setContainerCount(CONTAINER_COUNT - 2);
+        setContainerCount(CONTAINER_COUNT);
+    }
+
+    void setContainerCount(int newContainerCount) throws Exception {
+        log.debug(String.format("Setting container count to %d", newContainerCount));
+        clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+        TestUtils.rebalanceTestCluster();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/ShellContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/ShellContainerProviderIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/ShellContainerProviderIT.java
new file mode 100644
index 0000000..45b3023
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/ShellContainerProviderIT.java
@@ -0,0 +1,95 @@
+package org.apache.helix.metamanager;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.metamanager.impl.shell.ShellStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Shell container provider and shell status provider test. Scale-up and -down
+ * only, no failures.
+ * 
+ * @see ShellContainerProvider
+ * @see ShellStatusProvider
+ */
+@Test(groups = { "integration", "shell" })
+public class ShellContainerProviderIT {
+
+    static final Logger           log               = Logger.getLogger(ShellContainerProviderIT.class);
+
+    static final long             TEST_TIMEOUT      = 20000;
+    static final long             REBALANCE_TIMEOUT = 10000;
+
+    static final int              CONTAINER_COUNT   = 4;
+
+    StaticTargetProvider          clusterStatusProvider;
+    ShellContainerProviderProcess containerProvider;
+    ShellStatusProvider           containerStatusProvider;
+	
+	@BeforeClass(alwaysRun = true)
+	public void setupClass() {
+		log.info("installing shutdown hook");
+		Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+			@Override
+			public void run() {
+				try { teardownTest(); } catch(Exception ignore) {};
+			}
+		}));
+	}
+	
+    @BeforeMethod(alwaysRun = true)
+    public void setupTest() throws Exception {
+        teardownTest();
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        containerProvider = TestUtils.makeShellProvider("provider_0");
+        clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+        containerStatusProvider = new ShellStatusProvider();
+        TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+    }
+
+    @AfterMethod(alwaysRun = true)
+    public void teardownTest() throws Exception {
+        TestUtils.stopTestCluster();
+        TestUtils.stopZookeeper();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testStatic() throws Exception {
+        log.info("testing static");
+        setContainerCount(CONTAINER_COUNT);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleUp() throws Exception {
+        log.info("testing scale up");
+        setContainerCount(CONTAINER_COUNT + 2);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleDown() throws Exception {
+        log.info("testing scale down");
+        setContainerCount(CONTAINER_COUNT - 2);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleCycle() throws Exception {
+        log.info("testing scale cycle");
+        setContainerCount(CONTAINER_COUNT + 2);
+        setContainerCount(CONTAINER_COUNT);
+        setContainerCount(CONTAINER_COUNT - 2);
+        setContainerCount(CONTAINER_COUNT);
+    }
+
+    void setContainerCount(int newContainerCount) throws Exception {
+        log.debug(String.format("Setting container count to %d", newContainerCount));
+        clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+        TestUtils.rebalanceTestCluster();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestContainerProvider.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestContainerProvider.java
new file mode 100644
index 0000000..d42a7b3
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestContainerProvider.java
@@ -0,0 +1,17 @@
+package org.apache.helix.metamanager;
+
+import org.apache.helix.metamanager.provider.local.LocalContainerProvider;
+import org.apache.helix.metamanager.provider.local.LocalContainerSingleton;
+
+public class TestContainerProvider extends LocalContainerProvider {
+
+	public TestContainerProvider(String providerName) {
+		super(TestUtils.zkAddress, TestUtils.managedClusterName, providerName);
+	}
+
+	public void destroyAll() {
+		super.destroyAll();
+		LocalContainerSingleton.reset();
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestStatusProvider.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestStatusProvider.java
new file mode 100644
index 0000000..7e7f401
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestStatusProvider.java
@@ -0,0 +1,20 @@
+package org.apache.helix.metamanager;
+
+public class TestStatusProvider implements ClusterStatusProvider {
+
+	int targetContainerCount;
+	
+	public TestStatusProvider(int targetContainerCount) {
+		this.targetContainerCount = targetContainerCount;
+	}
+
+	@Override
+	public int getTargetContainerCount(String type) {
+		return targetContainerCount;
+	}
+
+	public void setTargetContainerCount(int targetContainerCount) {
+		this.targetContainerCount = targetContainerCount;
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtils.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtils.java
new file mode 100644
index 0000000..04587f0
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtils.java
@@ -0,0 +1,438 @@
+package org.apache.helix.metamanager;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.TimeoutException;
+
+import org.I0Itec.zkclient.IDefaultNameSpace;
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkServer;
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.metamanager.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProperties;
+import org.apache.helix.metamanager.provider.ProviderProperties;
+import org.apache.helix.metamanager.provider.ProviderRebalancer;
+import org.apache.helix.metamanager.provider.ProviderRebalancerSingleton;
+import org.apache.helix.model.ExternalView;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+/**
+ * Utility for creating a test cluster without the bootstrapping tool. Methods
+ * for verifying the number of active instances and partitions in a cluster.
+ * 
+ */
+public class TestUtils {
+
+    static final Logger                    log                      = Logger.getLogger(TestUtils.class);
+
+    public static int                      zkPort;
+    public static String                   zkAddress;
+    public static String                   resmanAddress;
+    public static String                   schedulerAddress;
+    public static String                   hdfsAddress;
+    public static String                   yarnUser;
+
+    public static final String             metaClusterName          = "meta-cluster";
+    public static final String             managedClusterName       = "managed-cluster";
+    public static final String             metaResourceName         = "container";
+    public static final String             managedResourceName      = "database";
+
+    public static final int                numManagedPartitions     = 10;
+    public static final int                numManagedReplica        = 2;
+
+    public static final long               TEST_TIMEOUT             = 120000;
+    public static final long               REBALANCE_TIMEOUT        = 60000;
+    public static final long               POLL_INTERVAL            = 1000;
+
+    public static final ProviderProperties providerProperties       = new ProviderProperties();
+
+    public static ZkServer                 server                   = null;
+    public static HelixAdmin               admin                    = null;
+    public static HelixManager             metaControllerManager    = null;
+    public static HelixManager             managedControllerManager = null;
+
+    public static Collection<Service>      providerServices         = new ArrayList<Service>();
+    public static Collection<Service>      auxServices              = new ArrayList<Service>();
+
+    public static TargetProvider           targetProvider           = null;
+    public static StatusProvider           statusProvider           = null;
+
+    static {
+        try {
+            configure();
+        } catch(Exception e) {
+            log.error("Could not setup TestUtils", e);
+            throw new RuntimeException(e);
+        }
+    }
+    
+    private TestUtils() {
+        // left blank
+    }
+    
+    public static void configure() throws IOException {
+        configure("standalone.properties");
+    }
+    
+    public static void configure(String resourcePath) throws IOException {
+        log.info(String.format("Configuring Test cluster from %s", resourcePath));
+        Properties properties = new Properties();
+        properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+        configure(properties);
+    }
+
+    public static void configure(Properties properties) {
+        log.info(String.format("Configuring from properties '%s'", properties));
+        
+        zkPort = Integer.valueOf(properties.getProperty("zookeeper.port"));
+        zkAddress = properties.getProperty("zookeeper.address");
+        resmanAddress = properties.getProperty("yarn.resourcemanager");
+        schedulerAddress = properties.getProperty("yarn.scheduler");
+        hdfsAddress = properties.getProperty("yarn.hdfs");
+        yarnUser = properties.getProperty("yarn.user");
+        
+        Preconditions.checkNotNull(zkPort);
+        Preconditions.checkNotNull(zkAddress);
+        Preconditions.checkNotNull(resmanAddress);
+        Preconditions.checkNotNull(schedulerAddress);
+        Preconditions.checkNotNull(hdfsAddress);
+        Preconditions.checkNotNull(yarnUser);
+        
+        configureInternal();
+    }
+    
+    static void configureInternal() {
+        providerProperties.clear();
+        providerProperties.setProperty(ProviderProperties.ADDRESS, zkAddress);
+        providerProperties.setProperty(ProviderProperties.CLUSTER, managedClusterName);
+        providerProperties.setProperty(ProviderProperties.METAADDRESS, zkAddress);
+        providerProperties.setProperty(ProviderProperties.METACLUSTER, metaClusterName);
+        providerProperties.setProperty(ProviderProperties.NAME, "<unknown>");
+    
+        Properties containerProperties = new Properties();
+        containerProperties.setProperty("class", "org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess");
+    
+        providerProperties.addContainer("container", containerProperties);
+    
+        log.info(String.format("Using provider properties '%s'", providerProperties));
+    }
+
+    public static void startZookeeper() throws Exception {
+        log.info("Starting ZooKeeper");
+
+        if (server != null)
+            throw new IllegalStateException("Zookeeper already running");
+
+        server = createLocalZookeeper();
+        server.start();
+    }
+
+    public static void stopZookeeper() throws Exception {
+        log.info("Stopping ZooKeeper");
+
+        if (server != null) {
+            server.shutdown();
+            server = null;
+        }
+    }
+
+    public static void startTestCluster(TargetProviderService targetProvider, StatusProviderService statusProvider, Service... containerProviderProcesses)
+            throws Exception {
+        log.debug(String.format("Starting test cluster"));
+
+        if (server == null)
+            throw new IllegalStateException("Zookeeper not running yet");
+
+        if (!auxServices.isEmpty() || !providerServices.isEmpty() || admin != null || metaControllerManager != null || managedControllerManager != null)
+            throw new IllegalStateException("TestCluster already running");
+
+        log.debug("Create admin");
+        admin = new ZKHelixAdmin(zkAddress);
+
+        log.debug("Create clusters");
+        admin.addCluster(metaClusterName, true);
+        admin.addCluster(managedClusterName, true);
+
+        log.debug("Setup config tool");
+        ProviderRebalancerSingleton.setTargetProvider(targetProvider);
+        ProviderRebalancerSingleton.setStatusProvider(statusProvider);
+
+        log.debug("Starting target and status provider");
+        TestUtils.targetProvider = startAuxService(targetProvider);
+        TestUtils.statusProvider = startAuxService(statusProvider);
+
+        // Managed Cluster
+        log.debug("Setup managed cluster");
+        admin.addStateModelDef(managedClusterName, "MasterSlave", new StateModelDefinition(StateModelConfigGenerator.generateConfigForMasterSlave()));
+        admin.addResource(managedClusterName, managedResourceName, numManagedPartitions, "MasterSlave", RebalanceMode.FULL_AUTO.toString());
+        IdealState managedIdealState = admin.getResourceIdealState(managedClusterName, managedResourceName);
+        managedIdealState.setInstanceGroupTag(metaResourceName);
+        managedIdealState.setReplicas(String.valueOf(numManagedReplica));
+        admin.setResourceIdealState(managedClusterName, managedResourceName, managedIdealState);
+
+        // Meta Cluster
+        log.debug("Setup meta cluster");
+        admin.addStateModelDef(metaClusterName, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+        admin.addResource(metaClusterName, metaResourceName, targetProvider.getTargetContainerCount(metaResourceName), "OnlineOffline",
+                RebalanceMode.USER_DEFINED.toString());
+
+        IdealState idealState = admin.getResourceIdealState(metaClusterName, metaResourceName);
+        idealState.setRebalancerClassName(ProviderRebalancer.class.getName());
+        idealState.setReplicas("1");
+
+        // BEGIN workaround
+        // FIXME workaround for HELIX-226
+        Map<String, List<String>> listFields = Maps.newHashMap();
+        Map<String, Map<String, String>> mapFields = Maps.newHashMap();
+        for (int i = 0; i < 256; i++) {
+            String partitionName = metaResourceName + "_" + i;
+            listFields.put(partitionName, new ArrayList<String>());
+            mapFields.put(partitionName, new HashMap<String, String>());
+        }
+        idealState.getRecord().setListFields(listFields);
+        idealState.getRecord().setMapFields(mapFields);
+        // END workaround
+
+        admin.setResourceIdealState(metaClusterName, metaResourceName, idealState);
+
+        log.debug("Starting container providers");
+        for (Service service : containerProviderProcesses) {
+            startProviderService(service);
+        }
+
+        log.debug("Starting managed cluster controller");
+        managedControllerManager = HelixControllerMain.startHelixController(zkAddress, managedClusterName, "managedController", HelixControllerMain.STANDALONE);
+
+        log.debug("Starting meta cluster controller");
+        metaControllerManager = HelixControllerMain.startHelixController(zkAddress, metaClusterName, "metaController", HelixControllerMain.STANDALONE);
+
+        log.debug("Waiting for stable state");
+        waitUntilRebalancedCount(targetProvider.getTargetContainerCount(metaResourceName));
+    }
+
+    public static void stopTestCluster() throws Exception {
+        log.debug(String.format("Stopping test cluster"));
+        if (managedControllerManager != null) {
+            log.info("Disconnecting managed cluster controller");
+            managedControllerManager.disconnect();
+        }
+        if (metaControllerManager != null) {
+            log.info("Disconnecting meta cluster controller");
+            metaControllerManager.disconnect();
+        }
+        log.info("Stopping provider services");
+        if (providerServices != null) {
+            for (Service service : providerServices) {
+                service.stop();
+            }
+            providerServices.clear();
+        }
+        log.debug("Stopping auxillary services");
+        if (auxServices != null) {
+            for (Service service : auxServices) {
+                service.stop();
+            }
+            auxServices.clear();
+        }
+
+        admin = null;
+        metaControllerManager = null;
+        managedControllerManager = null;
+    }
+
+    public static <T extends Service> T startAuxService(T service) throws Exception {
+        auxServices.add(service);
+        service.start();
+        return service;
+    }
+
+    public static <T extends Service> T startProviderService(T service) throws Exception {
+        providerServices.add(service);
+        service.start();
+        return service;
+    }
+
+    public static void rebalanceTestCluster() throws Exception {
+        log.debug(String.format("Triggering rebalance"));
+        IdealState poke = admin.getResourceIdealState(metaClusterName, metaResourceName);
+        admin.setResourceIdealState(metaClusterName, metaResourceName, poke);
+
+        int current = targetProvider.getTargetContainerCount(TestUtils.metaResourceName);
+        waitUntilRebalancedCount(current);
+    }
+
+    public static void waitUntilRebalancedCount(int containerCount) throws Exception {
+        log.debug(String.format("Waiting for rebalance with %d containers at '%s'", containerCount, zkAddress));
+
+        HelixAdmin admin = new ZKHelixAdmin(zkAddress);
+
+        try {
+            long limit = System.currentTimeMillis() + REBALANCE_TIMEOUT;
+            waitUntilPartitionCount(admin, metaClusterName, metaResourceName, containerCount, (limit - System.currentTimeMillis()));
+            waitUntilInstanceCount(admin, metaClusterName, metaResourceName, providerServices.size(), (limit - System.currentTimeMillis()));
+            waitUntilPartitionCount(admin, managedClusterName, managedResourceName, numManagedPartitions, (limit - System.currentTimeMillis()));
+            
+            // FIXME workaround for Helix FULL_AUTO rebalancer not providing guarantees for cluster expansion
+            //waitUntilInstanceCount(admin, managedClusterName, managedResourceName, containerCount, (limit - System.currentTimeMillis()));
+        } catch (Exception e) {
+            throw e;
+        } finally {
+            admin.close();
+        }
+    }
+
+    public static void waitUntilInstanceCount(HelixAdmin admin, String cluster, String resource, int targetCount, long timeout) throws Exception {
+        log.debug(String.format("Waiting for instance count (cluster='%s', resource='%s', instanceCount=%d, timeout=%d)", cluster, resource, targetCount,
+                timeout));
+
+        long limit = System.currentTimeMillis() + timeout;
+        while (limit > System.currentTimeMillis()) {
+            int assignedCount = getAssingedInstances(admin, cluster, resource).size();
+            log.debug(String.format("checking instance count for '%s:%s': target=%d, current=%d", cluster, resource, targetCount, assignedCount));
+
+            if (targetCount == assignedCount) {
+                return;
+            }
+            Thread.sleep(POLL_INTERVAL);
+        }
+        throw new TimeoutException();
+    }
+
+    public static void waitUntilPartitionCount(HelixAdmin admin, String cluster, String resource, int targetCount, long timeout) throws Exception {
+        log.debug(String.format("Waiting for partition count (cluster='%s', resource='%s', partitionCount=%d, timeout=%d)", cluster, resource, targetCount,
+                timeout));
+
+        long limit = System.currentTimeMillis() + timeout;
+        while (limit > System.currentTimeMillis()) {
+            int assignedCount = getAssingedPartitions(admin, cluster, resource).size();
+            log.debug(String.format("checking partition count for '%s:%s': target=%d, current=%d", cluster, resource, targetCount, assignedCount));
+
+            if (targetCount == assignedCount) {
+                return;
+            }
+            Thread.sleep(POLL_INTERVAL);
+        }
+        throw new TimeoutException();
+    }
+
+    public static Set<String> getAssingedInstances(HelixAdmin admin, String clusterName, String resourceName) {
+        Set<String> assignedInstances = new HashSet<String>();
+
+        ExternalView externalView = admin.getResourceExternalView(clusterName, resourceName);
+
+        if (externalView == null)
+            return assignedInstances;
+
+        for (String partitionName : externalView.getPartitionSet()) {
+            Map<String, String> stateMap = externalView.getStateMap(partitionName);
+            if (stateMap == null)
+                continue;
+
+            for (String instanceName : stateMap.keySet()) {
+                String state = stateMap.get(instanceName);
+                if ("MASTER".equals(state) || "SLAVE".equals(state) || "ONLINE".equals(state)) {
+                    assignedInstances.add(instanceName);
+                }
+            }
+        }
+
+        return assignedInstances;
+    }
+
+    public static Set<String> getAssingedPartitions(HelixAdmin admin, String clusterName, String resourceName) {
+        Set<String> assignedPartitions = new HashSet<String>();
+
+        ExternalView externalView = admin.getResourceExternalView(clusterName, resourceName);
+
+        if (externalView == null)
+            return assignedPartitions;
+
+        for (String partitionName : externalView.getPartitionSet()) {
+            Map<String, String> stateMap = externalView.getStateMap(partitionName);
+            if (stateMap == null)
+                continue;
+
+            for (String instanceName : stateMap.keySet()) {
+                String state = stateMap.get(instanceName);
+                if ("MASTER".equals(state) || "ONLINE".equals(state)) {
+                    assignedPartitions.add(partitionName);
+                }
+            }
+        }
+
+        return assignedPartitions;
+    }
+
+    public static ZkServer createLocalZookeeper() throws Exception {
+        String baseDir = "/tmp/metamanager/";
+        final String dataDir = baseDir + "zk/dataDir";
+        final String logDir = baseDir + "zk/logDir";
+        FileUtils.deleteDirectory(new File(dataDir));
+        FileUtils.deleteDirectory(new File(logDir));
+
+        IDefaultNameSpace defaultNameSpace = new IDefaultNameSpace() {
+            @Override
+            public void createDefaultNameSpace(ZkClient zkClient) {
+
+            }
+        };
+        return new ZkServer(dataDir, logDir, defaultNameSpace, zkPort);
+    }
+
+    public static LocalContainerProviderProcess makeLocalProvider(String name) throws Exception {
+        LocalContainerProviderProcess process = new LocalContainerProviderProcess();
+        process.configure(makeProviderProperties(name));
+        return process;
+    }
+
+    public static ShellContainerProviderProcess makeShellProvider(String name) throws Exception {
+        ShellContainerProviderProcess process = new ShellContainerProviderProcess();
+        process.configure(makeProviderProperties(name));
+        return process;
+    }
+
+    public static YarnContainerProviderProcess makeYarnProvider(String name) throws Exception {
+        YarnContainerProviderProperties properties = new YarnContainerProviderProperties();
+
+        properties.putAll(makeProviderProperties(name));
+        properties.put(YarnContainerProviderProperties.YARNDATA, zkAddress);
+        properties.put(YarnContainerProviderProperties.RESOURCEMANAGER, resmanAddress);
+        properties.put(YarnContainerProviderProperties.SCHEDULER, schedulerAddress);
+        properties.put(YarnContainerProviderProperties.USER, yarnUser);
+        properties.put(YarnContainerProviderProperties.HDFS, hdfsAddress);
+
+        YarnContainerProviderProcess process = new YarnContainerProviderProcess();
+        process.configure(properties);
+
+        return process;
+    }
+
+    static ProviderProperties makeProviderProperties(String name) {
+        ProviderProperties properties = new ProviderProperties();
+        properties.putAll(providerProperties);
+        properties.setProperty(ProviderProperties.NAME, name);
+        return properties;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtilsTest.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtilsTest.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtilsTest.java
new file mode 100644
index 0000000..3f0bd3e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtilsTest.java
@@ -0,0 +1,30 @@
+package org.apache.helix.metamanager;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.testng.annotations.Test;
+
+public class TestUtilsTest {
+
+	@Test
+	public void testStartStop() throws Exception {
+		TestUtils.startTestCluster(new TestStatusProvider(1),
+				Collections.<ClusterContainerProvider>singletonList(new TestContainerProvider("test")));
+		TestUtils.stopTestCluster();
+	}
+
+	@Test
+	public void testStartStopRepeated() throws Exception {
+		ClusterStatusProvider statusProvider = new TestStatusProvider(1);
+		List<ClusterContainerProvider> containerProviders = Collections.<ClusterContainerProvider>singletonList(new TestContainerProvider("test"));
+		
+		TestUtils.startTestCluster(statusProvider, containerProviders);
+		TestUtils.stopTestCluster();
+
+		TestUtils.startTestCluster(statusProvider, containerProviders);
+		TestUtils.stopTestCluster();
+
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtilsUT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtilsUT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtilsUT.java
new file mode 100644
index 0000000..50d7121
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtilsUT.java
@@ -0,0 +1,63 @@
+package org.apache.helix.metamanager;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.StatusProviderService;
+import org.apache.helix.metamanager.TargetProviderService;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.local.LocalStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.Test;
+
+/**
+ * Self-test of test cluster. Spawning zookeeper and cluster with single provider and single instance.
+ * 
+ * @see TestUtils
+ */
+@Test(groups={"unit"})
+public class TestUtilsUT {
+
+	static final Logger log = Logger.getLogger(TestUtilsUT.class);
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testZookeeper() throws Exception {
+		log.info("testing zookeeper");
+	    TestUtils.configure();
+		TestUtils.startZookeeper();
+		TestUtils.stopZookeeper();
+	}
+
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testCluster() throws Exception {
+		log.info("testing cluster");
+        TestUtils.configure();
+		TestUtils.startZookeeper();
+		
+		TestUtils.startTestCluster(new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, 1)),
+		        new LocalStatusProvider(), TestUtils.makeLocalProvider("test"));
+		TestUtils.stopTestCluster();
+		
+		TestUtils.stopZookeeper();
+	}
+
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testClusterRepeated() throws Exception {
+		log.info("testing cluster restart");
+        TestUtils.configure();
+		TestUtils.startZookeeper();
+		
+		TargetProviderService statusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, 1));
+		Service containerProvider = TestUtils.makeLocalProvider("test");
+		StatusProviderService containerStatusProvider = new LocalStatusProvider();
+		
+		TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+		TestUtils.stopTestCluster();
+
+		TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+		TestUtils.stopTestCluster();
+
+		TestUtils.stopZookeeper();
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/YarnContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/YarnContainerProviderIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/YarnContainerProviderIT.java
new file mode 100644
index 0000000..5d319ff
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/YarnContainerProviderIT.java
@@ -0,0 +1,101 @@
+package org.apache.helix.metamanager;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProperties;
+import org.apache.helix.metamanager.impl.yarn.YarnStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Yarn container provider and yarn status provider test. Scale-up and -down
+ * only, no failures.
+ * 
+ * @see YarnContainerProvider
+ * @see YarnStatusProvider
+ */
+@Test(groups={"integration", "yarn"})
+public class YarnContainerProviderIT {
+
+    static final Logger             log             = Logger.getLogger(YarnContainerProviderIT.class);
+
+    static final int                CONTAINER_COUNT = 4;
+
+    StaticTargetProvider            clusterStatusProvider;
+    YarnContainerProviderProcess    containerProvider;
+    YarnStatusProvider              containerStatusProvider;
+
+    YarnContainerProviderProperties properties;
+
+	@BeforeClass(alwaysRun = true)
+	public void setupClass() throws Exception {
+		log.info("installing shutdown hook");
+		Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+			@Override
+			public void run() {
+				try { teardownTest(); } catch(Exception ignore) {};
+			}
+		}));
+	}
+	
+	@BeforeMethod(alwaysRun = true)
+	public void setupTest() throws Exception {
+		log.debug("setting up yarn test case");
+		
+		teardownTest();
+		TestUtils.configure("distributed.properties");
+		TestUtils.startZookeeper();
+		
+		containerProvider = TestUtils.makeYarnProvider("provider_0");
+		containerStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+		clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+		TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+		
+		log.debug("running yarn test case");
+	}
+	
+	@AfterMethod(alwaysRun = true)
+	public void teardownTest() throws Exception {
+		log.debug("cleaning up yarn test case");
+		TestUtils.stopTestCluster();
+		TestUtils.stopZookeeper();
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testStatic() throws Exception {
+		log.info("testing static");
+		setContainerCount(CONTAINER_COUNT);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleUp() throws Exception {
+		log.info("testing scale up");
+		setContainerCount(CONTAINER_COUNT + 2);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleDown() throws Exception {
+		log.info("testing scale down");
+		setContainerCount(CONTAINER_COUNT - 2);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleCycle() throws Exception {
+		log.info("testing scale cycle");
+		setContainerCount(CONTAINER_COUNT + 2);
+		setContainerCount(CONTAINER_COUNT);
+		setContainerCount(CONTAINER_COUNT - 2);
+		setContainerCount(CONTAINER_COUNT);
+	}
+	
+	void setContainerCount(int newContainerCount) throws Exception {
+		log.debug(String.format("Setting container count to %d", newContainerCount));
+		clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+		TestUtils.rebalanceTestCluster();
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/BootstrapperIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/BootstrapperIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/BootstrapperIT.java
new file mode 100644
index 0000000..a7bae00
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/BootstrapperIT.java
@@ -0,0 +1,127 @@
+package org.apache.helix.metamanager.integration;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.TestUtils;
+import org.apache.helix.metamanager.bootstrapper.Boot;
+import org.apache.helix.metamanager.bootstrapper.ClusterService;
+import org.apache.helix.metamanager.bootstrapper.ControllerService;
+import org.apache.helix.metamanager.bootstrapper.MetaClusterService;
+import org.apache.helix.metamanager.bootstrapper.MetaControllerService;
+import org.apache.helix.metamanager.bootstrapper.MetaProviderService;
+import org.apache.helix.metamanager.bootstrapper.MetaResourceService;
+import org.apache.helix.metamanager.bootstrapper.ResourceService;
+import org.apache.helix.metamanager.bootstrapper.ZookeeperService;
+import org.apache.log4j.Logger;
+import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class BootstrapperIT {
+
+    static final Logger log               = Logger.getLogger(BootstrapperIT.class);
+
+    Boot        boot;
+    HelixAdmin admin;
+
+    @AfterMethod
+    public void teardown() throws Exception {
+        log.debug("tearing down bootstrap test");
+        if(admin != null) {
+            admin.close();
+            admin = null;
+        }
+        if (boot != null) {
+            boot.stop();
+            boot = null;
+        }
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void bootstrapLocalTest() throws Exception {
+        boot = new Boot();
+        boot.configure(getProperties("BootLocal.properties"));
+        boot.start();
+
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), ZookeeperService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), ClusterService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), ResourceService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), ControllerService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaClusterService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaResourceService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaProviderService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaControllerService.class));
+        
+        final long limit = System.currentTimeMillis() + TestUtils.REBALANCE_TIMEOUT;
+        
+        admin = new ZKHelixAdmin("localhost:2199");
+        waitUntil(admin, "meta", "container", 1, 7, (limit - System.currentTimeMillis()));
+        waitUntil(admin, "cluster", "resource", 7, 10, (limit - System.currentTimeMillis()));
+
+    }
+    
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void bootstrap2By2LocalTest() throws Exception {
+        boot = new Boot();
+        boot.configure(getProperties("Boot2By2Local.properties"));
+        boot.start();
+
+        verify2By2Setup();
+    }
+        
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void bootstrap2By2ShellTest() throws Exception {
+        boot = new Boot();
+        boot.configure(getProperties("Boot2By2Shell.properties"));
+        boot.start();
+
+        verify2By2Setup();
+    }
+        
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void bootstrap2By2YarnTest() throws Exception {
+        boot = new Boot();
+        boot.configure(getProperties("Boot2By2Yarn.properties"));
+        boot.start();
+
+        verify2By2Setup();
+    }
+        
+    void verify2By2Setup() throws Exception {
+        final long limit = System.currentTimeMillis() + TestUtils.REBALANCE_TIMEOUT;
+        final String address = "localhost:2199";
+
+        log.debug(String.format("connecting to zookeeper at '%s'", address));
+
+        admin = new ZKHelixAdmin(address);
+        waitUntil(admin, "meta", "database", 2, 3, (limit - System.currentTimeMillis()));
+        waitUntil(admin, "meta", "webserver", 2, 5, (limit - System.currentTimeMillis()));
+        waitUntil(admin, "cluster", "dbprod", 3, 8, (limit - System.currentTimeMillis()));
+        waitUntil(admin, "cluster", "wsprod", 5, 15, (limit - System.currentTimeMillis()));
+    }
+
+    static void waitUntil(HelixAdmin admin, String cluster, String resource, int instanceCount, int partitionCount, long timeout) throws Exception {
+        final long limit = System.currentTimeMillis() + timeout;
+        TestUtils.waitUntilInstanceCount(admin, cluster, resource, instanceCount, (limit - System.currentTimeMillis()));
+        TestUtils.waitUntilPartitionCount(admin, cluster, resource, partitionCount, (limit - System.currentTimeMillis()));
+    }
+    
+    static Properties getProperties(String resourcePath) throws IOException {
+        Properties properties = new Properties();
+        properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+        return properties;
+    }
+    
+    static boolean containsInstanceOf(Collection<Service> services, Class<?> clazz) {
+        for(Service service : services) {
+            if(clazz.isAssignableFrom(service.getClass())) return true;
+        }
+        return false;
+    }
+
+}


[06/15] Adding Helix-task-framework and Yarn integration modules

Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProcess.java
new file mode 100644
index 0000000..110fe68
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProcess.java
@@ -0,0 +1,64 @@
+package org.apache.helix.metamanager.impl.local;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.metamanager.container.ContainerStateModelFactory;
+import org.apache.log4j.Logger;
+
+public class LocalContainerProcess
+{
+  static final Logger log = Logger.getLogger(LocalContainerProcess.class);
+	
+  private String clusterName;
+  private String zkAddress;
+  private String instanceName;
+  private HelixManager participantManager;
+
+  public LocalContainerProcess(String clusterName, String zkAddress, String instanceName)
+  {
+    this.clusterName = clusterName;
+    this.zkAddress = zkAddress;
+    this.instanceName = instanceName;
+
+  }
+
+  public void start() throws Exception
+  {
+    log.info("STARTING "+ instanceName);
+    participantManager = HelixManagerFactory.getZKHelixManager(clusterName,
+        instanceName, InstanceType.PARTICIPANT, zkAddress);
+    participantManager.getStateMachineEngine().registerStateModelFactory(
+        "MasterSlave", new ContainerStateModelFactory());
+    participantManager.connect();
+    log.info("STARTED "+ instanceName);
+
+  }
+
+  public void stop()
+  {
+    if (participantManager != null)
+    {
+      participantManager.disconnect();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProvider.java
new file mode 100644
index 0000000..49c9d42
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProvider.java
@@ -0,0 +1,119 @@
+package org.apache.helix.metamanager.impl.local;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.ContainerProvider;
+import org.apache.helix.metamanager.ContainerProviderService;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.metamanager.container.ContainerUtils;
+import org.apache.helix.metamanager.impl.local.LocalContainerSingleton.LocalProcess;
+import org.apache.helix.metamanager.provider.ProviderProperties;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * {@link ContainerProvider} spawning VM-local containers. Only works in single-VM
+ * deployments as container metadata is managed via singleton.
+ * 
+ * @see LocalContainerSingleton
+ */
+class LocalContainerProvider implements ContainerProviderService {
+
+    static final Logger           log   = Logger.getLogger(LocalContainerProvider.class);
+
+    final Map<String, Properties> types = new HashMap<String, Properties>();
+
+    String                        address;
+    String                        cluster;
+    String                        name;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        ProviderProperties providerProperties = new ProviderProperties();
+        providerProperties.putAll(properties);
+        Preconditions.checkArgument(providerProperties.isValid());
+
+        this.address = providerProperties.getProperty("address");
+        this.cluster = providerProperties.getProperty("cluster");
+        this.name = providerProperties.getProperty("name");
+
+        for (String containerType : providerProperties.getContainers()) {
+            registerType(containerType, providerProperties.getContainer(containerType));
+        }
+    }
+
+    @Override
+    public void start() throws Exception {
+        // left blank
+    }
+
+    @Override
+    public void stop() throws Exception {
+        destroyAll();
+    }
+
+    @Override
+    public void create(String id, String type) throws Exception {
+        Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            Preconditions.checkState(!processes.containsKey(id), "Process '%s' already exists", id);
+            Preconditions.checkState(types.containsKey(type), "Type '%s' is not registered", type);
+
+            ContainerProcessProperties properties = new ContainerProcessProperties(types.get(type));
+
+            properties.setProperty(ContainerProcessProperties.CLUSTER, cluster);
+            properties.setProperty(ContainerProcessProperties.NAME, id);
+            properties.setProperty(ContainerProcessProperties.ADDRESS, address);
+
+            log.info(String.format("Running container '%s' (properties='%s')", id, properties));
+
+            ContainerProcess process = ContainerUtils.createProcess(properties);
+            process.start();
+
+            processes.put(id, new LocalProcess(id, name, process));
+
+        }
+    }
+
+    @Override
+    public void destroy(String id) throws Exception {
+        Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            if (!processes.containsKey(id))
+                throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+            log.info(String.format("Destroying container '%s'", id));
+
+            LocalProcess local = processes.remove(id);
+
+            local.process.stop();
+        }
+    }
+
+    @Override
+    public void destroyAll() {
+        Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            log.info("Destroying all processes");
+            for (LocalProcess local : new HashSet<LocalProcess>(processes.values())) {
+                if (local.owner.equals(name)) {
+                    try { destroy(local.id); } catch (Exception ignore) {}
+                }
+            }
+        }
+    }
+
+    void registerType(String name, Properties properties) {
+        log.debug(String.format("Registering container type '%s' (properties='%s')", name, properties));
+        types.put(name, properties);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProviderProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProviderProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProviderProcess.java
new file mode 100644
index 0000000..ed090bc
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProviderProcess.java
@@ -0,0 +1,45 @@
+package org.apache.helix.metamanager.impl.local;
+
+import java.util.Properties;
+
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.provider.ProviderProcess;
+import org.apache.helix.metamanager.provider.ProviderProperties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Configurable and runnable service for {@link LocalContainerProvider}. 
+ *
+ */
+public class LocalContainerProviderProcess implements Service {
+    LocalContainerProvider provider;
+    ProviderProcess        process;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        ProviderProperties providerProperties = new ProviderProperties();
+        providerProperties.putAll(properties);
+        
+        Preconditions.checkArgument(providerProperties.isValid(), "provider properties not valid (properties='%s')", properties);
+        
+        provider = new LocalContainerProvider();
+        provider.configure(properties);
+
+        process = new ProviderProcess();
+        process.configure(providerProperties);
+        process.setConteinerProvider(provider);
+    }
+
+    @Override
+    public void start() throws Exception {
+        provider.start();
+        process.start();
+    }
+
+    @Override
+    public void stop() throws Exception {
+        process.stop();
+        provider.stop();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerSingleton.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerSingleton.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerSingleton.java
new file mode 100644
index 0000000..b91a848
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerSingleton.java
@@ -0,0 +1,56 @@
+package org.apache.helix.metamanager.impl.local;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.helix.metamanager.container.ContainerProcess;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Singleton tracking metadata for VM-local containers spawned via
+ * {@link LocalContainerProvider}.
+ * 
+ */
+public class LocalContainerSingleton {
+    final static Map<String, LocalProcess> processes = new HashMap<String, LocalProcess>();
+
+    private LocalContainerSingleton() {
+        // left blank
+    }
+
+    public static Map<String, LocalProcess> getProcesses() {
+        return processes;
+    }
+
+    public static void reset() {
+        synchronized (processes) {
+            for (LocalProcess local : processes.values()) {
+                local.process.stop();
+            }
+            processes.clear();
+        }
+    }
+
+    public static void killProcess(String id) throws InterruptedException {
+        synchronized (processes) {
+            Preconditions.checkArgument(processes.containsKey(id), "Process '%s' does not exist", id);
+            ContainerProcess process = processes.get(id).process;
+            process.stop();
+            processes.remove(id);
+        }
+    }
+
+    static class LocalProcess {
+        final String           id;
+        final String           owner;
+        final ContainerProcess process;
+
+        public LocalProcess(String id, String owner, ContainerProcess process) {
+            this.id = id;
+            this.owner = owner;
+            this.process = process;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerStatusProvider.java
new file mode 100644
index 0000000..604ad25
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerStatusProvider.java
@@ -0,0 +1,37 @@
+package org.apache.helix.metamanager.impl.local;
+
+import java.util.Map;
+
+import org.apache.helix.metamanager.ContainerStatusProvider;
+import org.apache.helix.metamanager.impl.local.LocalContainerSingleton.LocalProcess;
+
+public class LocalContainerStatusProvider implements ContainerStatusProvider {
+
+	@Override
+	public boolean exists(String id) {
+		Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+		
+		synchronized (processes) {
+			return processes.containsKey(id);
+		}
+	}
+
+	@Override
+	public boolean isActive(String id) {
+		Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+		
+		synchronized (processes) {
+			return processes.get(id).process.isActive();
+		}
+	}
+
+	@Override
+	public boolean isFailed(String id) {
+		Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+		
+		synchronized (processes) {
+			return processes.get(id).process.isFailed();
+		}
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalStatusProvider.java
new file mode 100644
index 0000000..c53a3ba
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalStatusProvider.java
@@ -0,0 +1,53 @@
+package org.apache.helix.metamanager.impl.local;
+
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.StatusProviderService;
+import org.apache.helix.metamanager.impl.local.LocalContainerSingleton.LocalProcess;
+
+/**
+ * StatusProvider for VM-local containers spawned via
+ * {@link LocalContainerProvider}. Runnable and configurable service.
+ * 
+ */
+public class LocalStatusProvider implements StatusProviderService {
+
+    @Override
+    public boolean exists(String id) {
+        Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            return processes.containsKey(id);
+        }
+    }
+
+    @Override
+    public boolean isHealthy(String id) {
+        Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            LocalProcess local = processes.get(id);
+
+            if (local == null)
+                return false;
+
+            return local.process.isActive();
+        }
+    }
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        // left blank
+    }
+
+    @Override
+    public void start() throws Exception {
+        // left blank
+    }
+
+    @Override
+    public void stop() throws Exception {
+        // left blank
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProcess.java
new file mode 100644
index 0000000..0069110
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProcess.java
@@ -0,0 +1,93 @@
+package org.apache.helix.metamanager.impl.shell;
+
+import java.io.File;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.metamanager.container.ContainerUtils;
+import org.apache.log4j.Logger;
+
+/**
+ * Host process for Shell-based container. ContainerProcess configuration is
+ * read from path in first command-line argument. Status is maintained using
+ * temporary marker file. (Program entry point)
+ * 
+ */
+class ShellContainerProcess {
+    static final Logger             log              = Logger.getLogger(ShellContainerProcess.class);
+
+    public static final long        MONITOR_INTERVAL = 5000;
+
+    static String                   markerDir;
+    static ContainerProcess         process;
+    static ScheduledExecutorService executor         = Executors.newSingleThreadScheduledExecutor();
+
+    public static void main(String[] args) throws Exception {
+        final String propertiesPath = args[0];
+        markerDir = args[1];
+
+        ContainerProcessProperties properties = ContainerUtils.getPropertiesFromPath(propertiesPath);
+
+        process = ContainerUtils.createProcess(properties);
+
+        log.debug("Installing shutdown hooks");
+        Runtime.getRuntime().addShutdownHook(new Thread() {
+            @Override
+            public void run() {
+                log.debug("Running shutdown hook");
+                try {
+                    ShellContainerProcess.stop();
+                } catch (Exception ignore) {
+                }
+            }
+        });
+
+        log.debug("Launching shell container process");
+        process.start();
+
+        ShellUtils.createMarker(new File(markerDir));
+
+        log.debug("Launching process monitor");
+        executor.scheduleAtFixedRate(new ProcessMonitor(), 0, MONITOR_INTERVAL, TimeUnit.MILLISECONDS);
+    }
+
+    static void stop() throws InterruptedException {
+        log.debug("Shutting down shell process");
+        if (process != null) {
+            process.stop();
+            ShellUtils.destroyMarker(new File(markerDir));
+        }
+        if (executor != null) {
+            executor.shutdownNow();
+            while (!executor.isTerminated()) {
+                Thread.sleep(100);
+            }
+            executor = null;
+        }
+    }
+
+    static class ProcessMonitor implements Runnable {
+        @Override
+        public void run() {
+            if (process.isFailed()) {
+                log.warn("detected process failure");
+                try {
+                    ShellContainerProcess.stop();
+                } catch (Exception ignore) {
+                }
+                System.exit(1);
+            }
+            if (!process.isActive()) {
+                log.warn("detected process shutdown");
+                try {
+                    ShellContainerProcess.stop();
+                } catch (Exception ignore) {
+                }
+            }
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProvider.java
new file mode 100644
index 0000000..69e2553
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProvider.java
@@ -0,0 +1,151 @@
+package org.apache.helix.metamanager.impl.shell;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.metamanager.ContainerProvider;
+import org.apache.helix.metamanager.ContainerProviderService;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.metamanager.impl.shell.ShellContainerSingleton.ShellProcess;
+import org.apache.helix.metamanager.provider.ProviderProperties;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.io.Files;
+
+/**
+ * {@link ContainerProvider} spawning shell-based containers. Only works in single-VM
+ * deployments as container metadata is managed via singleton.
+ * 
+ * @see ShellContainerSingleton
+ */
+class ShellContainerProvider implements ContainerProviderService {
+
+    static final Logger                    log               = Logger.getLogger(ShellContainerProvider.class);
+
+    static final String                    RUN_COMMAND       = "/bin/sh";
+
+    static final long                      POLL_INTERVAL     = 1000;
+    static final long                      CONTAINER_TIMEOUT = 60000;
+
+    // global view of processes required
+    static final Map<String, ShellProcess> processes         = new HashMap<String, ShellProcess>();
+
+    final Map<String, Properties>          types             = new HashMap<String, Properties>();
+
+    String                                 address;
+    String                                 cluster;
+    String                                 name;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        Preconditions.checkNotNull(properties);
+        ProviderProperties providerProperties = new ProviderProperties();
+        providerProperties.putAll(properties);
+        Preconditions.checkArgument(providerProperties.isValid());
+
+        this.address = providerProperties.getProperty("address");
+        this.cluster = providerProperties.getProperty("cluster");
+        this.name = providerProperties.getProperty("name");
+
+        for (String containerType : providerProperties.getContainers()) {
+            registerType(containerType, providerProperties.getContainer(containerType));
+        }
+    }
+
+    @Override
+    public void start() throws Exception {
+        // left blank
+    }
+
+    @Override
+    public void stop() throws Exception {
+        destroyAll();
+    }
+
+    @Override
+    public void create(String id, String type) throws Exception {
+        Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            Preconditions.checkState(!processes.containsKey(id), "Process '%s' already exists", id);
+            Preconditions.checkState(types.containsKey(type), "Type '%s' is not registered", type);
+
+            ContainerProcessProperties properties = new ContainerProcessProperties(types.get(type));
+
+            properties.setProperty(ContainerProcessProperties.CLUSTER, cluster);
+            properties.setProperty(ContainerProcessProperties.NAME, id);
+            properties.setProperty(ContainerProcessProperties.ADDRESS, address);
+
+            File tmpDir = Files.createTempDir();
+            File tmpProperties = new File(tmpDir.getCanonicalPath() + File.separator + ShellUtils.SHELL_CONTAINER_PROPERTIES);
+            File tmpMarker = new File(tmpDir.getCanonicalPath());
+
+            properties.store(new FileWriter(tmpProperties), id);
+
+            log.info(String.format("Running container '%s' (properties='%s')", id, properties));
+
+            log.debug(String.format("Invoking command '%s %s %s %s'", RUN_COMMAND, ShellUtils.SHELL_CONTAINER_PATH, tmpProperties.getCanonicalPath(),
+                    tmpMarker.getCanonicalPath()));
+
+            ProcessBuilder builder = new ProcessBuilder();
+            builder.command(RUN_COMMAND, ShellUtils.SHELL_CONTAINER_PATH, tmpProperties.getCanonicalPath(), tmpMarker.getCanonicalPath());
+
+            Process process = builder.start();
+
+            processes.put(id, new ShellProcess(id, name, process, tmpDir));
+
+            long limit = System.currentTimeMillis() + CONTAINER_TIMEOUT;
+            while (!ShellUtils.hasMarker(tmpDir)) {
+                if (System.currentTimeMillis() >= limit) {
+                    throw new TimeoutException(String.format("Container '%s' failed to reach active state", id));
+                }
+                Thread.sleep(POLL_INTERVAL);
+            }
+        }
+    }
+
+    @Override
+    public void destroy(String id) throws Exception {
+        Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            if (!processes.containsKey(id))
+                throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+            log.info(String.format("Destroying container '%s'", id));
+
+            ShellProcess shell = processes.remove(id);
+            shell.process.destroy();
+            shell.process.waitFor();
+
+            FileUtils.deleteDirectory(shell.tmpDir);
+        }
+    }
+
+    @Override
+    public void destroyAll() {
+        Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            log.info("Destroying all owned processes");
+            for (ShellProcess process : new HashSet<ShellProcess>(processes.values())) {
+                if (process.owner.equals(name)) {
+			        try { destroy(process.id); } catch (Exception ignore) {}
+                }
+            }
+        }
+    }
+
+    void registerType(String name, Properties properties) {
+        log.debug(String.format("Registering container type '%s' (properties='%s')", name, properties));
+        types.put(name, properties);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProviderProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProviderProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProviderProcess.java
new file mode 100644
index 0000000..36f150d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProviderProcess.java
@@ -0,0 +1,45 @@
+package org.apache.helix.metamanager.impl.shell;
+
+import java.util.Properties;
+
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.provider.ProviderProcess;
+import org.apache.helix.metamanager.provider.ProviderProperties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Configurable and runnable service for {@link ShellContainerProvider}.
+ * 
+ */
+public class ShellContainerProviderProcess implements Service {
+    ShellContainerProvider provider;
+    ProviderProcess        process;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        ProviderProperties providerProperties = new ProviderProperties();
+        providerProperties.putAll(properties);
+
+        Preconditions.checkArgument(providerProperties.isValid(), "provider properties not valid (properties='%s')", properties);
+
+        provider = new ShellContainerProvider();
+        provider.configure(properties);
+
+        process = new ProviderProcess();
+        process.configure(providerProperties);
+        process.setConteinerProvider(provider);
+    }
+
+    @Override
+    public void start() throws Exception {
+        provider.start();
+        process.start();
+    }
+
+    @Override
+    public void stop() throws Exception {
+        process.stop();
+        provider.stop();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerSingleton.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerSingleton.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerSingleton.java
new file mode 100644
index 0000000..91054e1
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerSingleton.java
@@ -0,0 +1,58 @@
+package org.apache.helix.metamanager.impl.shell;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Singleton tracking metadata for shell-based containers spawned via
+ * {@link ShellContainerProvider}.
+ * 
+ */
+public class ShellContainerSingleton {
+    static final Map<String, ShellProcess> processes = new HashMap<String, ShellProcess>();
+
+    private ShellContainerSingleton() {
+        // left blank
+    }
+
+    public static Map<String, ShellProcess> getProcesses() {
+        return processes;
+    }
+
+    public static void reset() {
+        synchronized (processes) {
+            for (ShellProcess shell : processes.values()) {
+                shell.process.destroy();
+				try { shell.process.waitFor(); } catch(Exception ignore) {}
+            }
+            processes.clear();
+        }
+    }
+
+    public static void killProcess(String id) throws InterruptedException {
+        synchronized (processes) {
+            Preconditions.checkArgument(processes.containsKey(id), "Process '%s' does not exist", id);
+            Process process = processes.get(id).process;
+            process.destroy();
+            process.waitFor();
+            processes.remove(id);
+        }
+    }
+
+    static class ShellProcess {
+        final String  id;
+        final String  owner;
+        final Process process;
+        final File    tmpDir;
+
+        public ShellProcess(String id, String owner, Process process, File tmpDir) {
+            this.id = id;
+            this.owner = owner;
+            this.process = process;
+            this.tmpDir = tmpDir;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerStatusProvider.java
new file mode 100644
index 0000000..03e55c6
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerStatusProvider.java
@@ -0,0 +1,52 @@
+package org.apache.helix.metamanager.impl.shell;
+
+import java.util.Map;
+
+import org.apache.helix.metamanager.ContainerStatusProvider;
+import org.apache.helix.metamanager.impl.shell.ShellContainerSingleton.ShellProcess;
+
+public class ShellContainerStatusProvider implements ContainerStatusProvider {
+
+	@Override
+	public boolean exists(String id) {
+		Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+		synchronized (processes) {
+			return processes.containsKey(id);
+		}
+	}
+
+	@Override
+	public boolean isActive(String id) {
+		Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+		synchronized (processes) {
+			ShellProcess shell = processes.get(id);
+			
+			try {
+				shell.process.exitValue();
+				return false;
+			} catch (IllegalThreadStateException e) {
+				// still running
+				return true;
+			}
+		}
+	}
+
+	@Override
+	public boolean isFailed(String id) {
+		Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+		synchronized (processes) {
+			ShellProcess shell = processes.get(id);
+			
+			try {
+				return (shell.process.exitValue() != 0);
+			} catch (IllegalThreadStateException e) {
+				// still running
+				return false;
+			}
+		}
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellStatusProvider.java
new file mode 100644
index 0000000..015218b
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellStatusProvider.java
@@ -0,0 +1,64 @@
+package org.apache.helix.metamanager.impl.shell;
+
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.StatusProviderService;
+import org.apache.helix.metamanager.impl.shell.ShellContainerSingleton.ShellProcess;
+
+/**
+ * StatusProvider for shell-based containers spawned via
+ * {@link ShellContainerProvider}. Runnable and configurable service.
+ * 
+ */
+public class ShellStatusProvider implements StatusProviderService {
+
+    @Override
+    public boolean exists(String id) {
+        Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            return processes.containsKey(id);
+        }
+    }
+
+    @Override
+    public boolean isHealthy(String id) {
+        Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+        synchronized (processes) {
+            ShellProcess shell = processes.get(id);
+
+            if (shell == null)
+                return false;
+
+            if (!ShellUtils.hasMarker(shell.tmpDir))
+                return false;
+
+            try {
+                // exit value
+                shell.process.exitValue();
+                return false;
+            } catch (IllegalThreadStateException e) {
+                // expected
+            }
+
+            return true;
+        }
+    }
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        // left blank
+    }
+
+    @Override
+    public void start() throws Exception {
+        // left blank
+    }
+
+    @Override
+    public void stop() throws Exception {
+        // left blank
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellUtils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellUtils.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellUtils.java
new file mode 100644
index 0000000..dcec2ae
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellUtils.java
@@ -0,0 +1,54 @@
+package org.apache.helix.metamanager.impl.shell;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.log4j.Logger;
+
+/**
+ * Utility for creating and destroying temporary marker files for shell-based
+ * containers.
+ * 
+ */
+class ShellUtils {
+
+    static final Logger log                        = Logger.getLogger(ShellUtils.class);
+
+    static final String SHELL_CONTAINER_PATH       = "target/metamanager-pkg/bin/shell-container-process.sh";
+    static final String SHELL_CONTAINER_PROPERTIES = "container.properties";
+    static final String SHELL_CONTAINER_MARKER     = "active";
+
+    private ShellUtils() {
+        // left blank
+    }
+
+    public static boolean hasMarker(File processDir) {
+        try {
+            log.debug(String.format("checking for marker file '%s'", getMarkerFile(processDir)));
+            if (getMarkerFile(processDir).exists())
+                return true;
+        } catch (IOException e) {
+            // ignore
+        }
+        return false;
+    }
+
+    public static void createMarker(File processDir) throws IOException {
+        log.debug(String.format("creating marker file '%s'", getMarkerFile(processDir)));
+        getMarkerFile(processDir).createNewFile();
+    }
+
+    public static void destroyMarker(File processDir) {
+        try {
+            log.debug(String.format("destroying marker file '%s'", getMarkerFile(processDir)));
+            getMarkerFile(processDir).delete();
+        } catch (IOException e) {
+            // ignore
+        }
+    }
+
+    public static File getMarkerFile(File processDir) throws IOException {
+        return new File(processDir.getCanonicalPath() + File.separatorChar + SHELL_CONTAINER_MARKER);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ApplicationConfig.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ApplicationConfig.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ApplicationConfig.java
new file mode 100644
index 0000000..9c85232
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ApplicationConfig.java
@@ -0,0 +1,32 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+public class ApplicationConfig {
+	final String clusterAddress;
+	final String clusterName;
+	final String metadataAddress;
+	final String providerName;
+
+	public ApplicationConfig(String clusterAddress, String clusterName,
+			String metadataAddress, String providerName) {
+		this.clusterAddress = clusterAddress;
+		this.clusterName = clusterName;
+		this.metadataAddress = metadataAddress;
+		this.providerName = providerName;
+	}
+
+	public String getClusterAddress() {
+		return clusterAddress;
+	}
+
+	public String getClusterName() {
+		return clusterName;
+	}
+
+	public String getMetadataAddress() {
+		return metadataAddress;
+	}
+
+	public String getProviderName() {
+		return providerName;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ContainerMetadata.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ContainerMetadata.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ContainerMetadata.java
new file mode 100644
index 0000000..7b25d31
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ContainerMetadata.java
@@ -0,0 +1,80 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+
+class ContainerMetadata {
+
+	static enum ContainerState {
+		ACQUIRE,
+		CONNECTING,
+		ACTIVE,
+		TEARDOWN,
+		FAILED,
+		HALTED,
+		FINALIZE
+	}
+	
+	String id;
+	ContainerState state;
+	int yarnId;
+	String owner;
+	YarnContainerProcessProperties properties;
+
+	public ContainerMetadata() {
+		// left blank
+	}
+	
+	public ContainerMetadata(String id, String owner, YarnContainerProcessProperties properties) {
+		this.id = id;
+		this.state = ContainerState.ACQUIRE;
+		this.yarnId = -1;
+		this.owner = owner;
+		this.properties = properties;
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	public ContainerMetadata setId(String id) {
+		this.id = id;
+		return this;
+	}
+
+	public ContainerState getState() {
+		return state;
+	}
+
+	public ContainerMetadata setState(ContainerState state) {
+		this.state = state;
+		return this;
+	}
+
+	public int getYarnId() {
+		return yarnId;
+	}
+
+	public ContainerMetadata setYarnId(int yarnId) {
+		this.yarnId = yarnId;
+		return this;
+	}
+
+	public String getOwner() {
+		return owner;
+	}
+
+	public ContainerMetadata setOwner(String owner) {
+		this.owner = owner;
+		return this;
+	}
+
+	public YarnContainerProcessProperties getProperties() {
+		return properties;
+	}
+
+	public ContainerMetadata setProperties(YarnContainerProcessProperties properties) {
+		this.properties = properties;
+		return this;
+	}
+	
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/MetadataProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/MetadataProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/MetadataProvider.java
new file mode 100644
index 0000000..a35c16e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/MetadataProvider.java
@@ -0,0 +1,42 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.Collection;
+
+interface MetadataProvider {
+
+	public boolean exists(String id);
+
+	public void create(ContainerMetadata meta) throws MetadataException;
+
+	public ContainerMetadata read(String id) throws MetadataException;
+
+	public Collection<ContainerMetadata> readAll() throws MetadataException;
+
+	public void update(ContainerMetadata meta) throws MetadataException;
+
+	public void delete(String id) throws MetadataException;
+	
+	public static class MetadataException extends Exception {
+
+		/**
+		 * 
+		 */
+		private static final long serialVersionUID = -2846997013918977056L;
+
+		public MetadataException() {
+			super();
+		}
+
+		public MetadataException(String message, Throwable cause) {
+			super(message, cause);
+		}
+
+		public MetadataException(String message) {
+			super(message);
+		}
+
+		public MetadataException(Throwable cause) {
+			super(cause);
+		}	
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/MetadataService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/MetadataService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/MetadataService.java
new file mode 100644
index 0000000..35dd1ee
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/MetadataService.java
@@ -0,0 +1,42 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.Collection;
+
+public interface MetadataService {
+
+	public boolean exists(String id);
+
+	public void create(ContainerMetadata meta) throws MetadataServiceException;
+
+	public ContainerMetadata read(String id) throws MetadataServiceException;
+
+	public Collection<ContainerMetadata> readAll() throws MetadataServiceException;
+
+	public void update(ContainerMetadata meta) throws MetadataServiceException;
+
+	public void delete(String id) throws MetadataServiceException;
+	
+	public static class MetadataServiceException extends Exception {
+
+		/**
+		 * 
+		 */
+		private static final long serialVersionUID = -2846997013918977056L;
+
+		public MetadataServiceException() {
+			super();
+		}
+
+		public MetadataServiceException(String message, Throwable cause) {
+			super(message, cause);
+		}
+
+		public MetadataServiceException(String message) {
+			super(message);
+		}
+
+		public MetadataServiceException(Throwable cause) {
+			super(cause);
+		}	
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/Utils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/Utils.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/Utils.java
new file mode 100644
index 0000000..99f9a03
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/Utils.java
@@ -0,0 +1,94 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.impl.yarn.ContainerMetadata.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.TypeAdapter;
+import com.google.gson.stream.JsonReader;
+import com.google.gson.stream.JsonToken;
+import com.google.gson.stream.JsonWriter;
+
+public class Utils {
+	
+	static final Logger log = Logger.getLogger(Utils.class);
+	
+	static Gson gson;
+	static {
+		GsonBuilder builder = new GsonBuilder();
+		builder.registerTypeAdapter(ContainerState.class, new ContainerStateAdapter());
+		builder.setPrettyPrinting();
+		gson = builder.create();
+	}
+	
+	static Map<String, LocalResource>  dummyResources = createDummyResources();
+	
+	static String toJson(ContainerMetadata meta) {
+		return gson.toJson(meta);
+	}
+	
+	static ContainerMetadata fromJson(String json) {
+		return gson.fromJson(json, ContainerMetadata.class);
+	}
+	
+	static Map<String, LocalResource> getDummyResources() {
+		return dummyResources;
+	}
+
+	private static Map<String, LocalResource> createDummyResources() {
+		File dummy = new File("/tmp/dummy");
+		
+		if(!dummy.exists()) {
+	    	try {
+	    		dummy.createNewFile();
+	    	} catch(Exception e) {
+	    		log.error("could not create dummy file", e);
+	    		System.exit(1);
+	    	}
+		}
+	    
+	    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
+	    Path path = new Path(dummy.toURI());
+	    LocalResource localResource = Records.newRecord(LocalResource.class);
+	    localResource.setType(LocalResourceType.FILE);
+	    localResource.setVisibility(LocalResourceVisibility.APPLICATION);          
+	    localResource.setResource(ConverterUtils.getYarnUrlFromPath(path)); 
+	    localResource.setTimestamp(dummy.lastModified());
+	    localResource.setSize(dummy.length());
+	    localResources.put("dummy", localResource);
+		return localResources;
+	}
+	
+	static class ContainerStateAdapter extends TypeAdapter<ContainerState> {
+		@Override
+		public ContainerState read(JsonReader reader) throws IOException {
+			if (reader.peek() == JsonToken.NULL) {
+				reader.nextNull();
+				return null;
+			}
+			return ContainerState.valueOf(reader.nextString());
+		}
+
+		@Override
+		public void write(JsonWriter writer, ContainerState value) throws IOException {
+			if (value == null) {
+				writer.nullValue();
+				return;
+			}
+			writer.value(value.name());
+		}
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnApplication.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnApplication.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnApplication.java
new file mode 100644
index 0000000..3b7dcd6
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnApplication.java
@@ -0,0 +1,171 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.io.File;
+import java.net.InetSocketAddress;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ClientRMProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.ConfigTool;
+import org.apache.helix.metamanager.Service;
+import org.apache.log4j.Logger;
+
+class YarnApplication implements Service {
+
+	static final Logger log = Logger.getLogger(YarnApplication.class);
+	
+	static final String ENV_CLUSTER_ADDRESS  = "YA_CLUSTER_ADDRESS";
+	static final String ENV_CLUSTER_NAME     = "YA_CLUSTER_NAME";
+	static final String ENV_METADATA_ADDRESS = "YA_METADATA_ADDRESS";
+	static final String ENV_PROVIDER_NAME    = "YA_PROVIDER_NAME";
+
+	static String YARN_MASTER_COMMAND = "/bin/sh %s 1>%s/stdout 2>%s/stderr";
+	
+	Configuration conf;
+	YarnRPC rpc;
+	ClientRMProtocol rmClient;
+	ApplicationId appId;
+	File propertiesFile;
+	
+	YarnApplicationProperties properties;
+
+	public YarnApplication() {
+	    // left blank
+	}
+	
+    public YarnApplication(YarnApplicationProperties properties) {
+        this.properties = properties;
+        internalConf();
+    }
+    
+    @Override
+    public void configure(Properties properties) throws Exception {
+        YarnApplicationProperties yarnProps = new YarnApplicationProperties();
+        yarnProps.putAll(properties);
+        this.properties = yarnProps;
+        internalConf();
+    }
+
+    public void internalConf() {
+        this.conf = new YarnConfiguration();
+        this.conf.set(YarnConfiguration.RM_ADDRESS, properties.getYarnResourceManager());
+        this.conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, properties.getYarnScheduler());
+        this.conf.set(FileSystem.FS_DEFAULT_NAME_KEY, properties.getYarnHdfs());
+
+        this.rpc = YarnRPC.create(conf);
+    }
+    
+    @Override
+    public void start() throws Exception {
+		connect();
+		
+		String command = String.format(YARN_MASTER_COMMAND, ConfigTool.YARN_MASTER_PATH,
+				ApplicationConstants.LOG_DIR_EXPANSION_VAR, ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+
+		log.info(String.format("Starting application '%s' provider '%s' (masterCommand='%s')",
+				properties.getProviderMetadata(), properties.getProviderName(), command));
+
+		log.debug(String.format("Running master command \"%s\"", command));
+		
+		// app id
+		GetNewApplicationRequest appRequest = Records.newRecord(GetNewApplicationRequest.class);
+		GetNewApplicationResponse appResponse = rmClient.getNewApplication(appRequest);
+
+		this.appId = appResponse.getApplicationId();
+
+		log.info(String.format("Acquired app id '%s' for '%s'", appId.toString(), properties.getProviderName()));
+		
+		// command
+		ContainerLaunchContext launchContext = Records.newRecord(ContainerLaunchContext.class);
+		launchContext.setCommands(Collections.singletonList(command));
+
+		// resource limit
+		Resource resource = Records.newRecord(Resource.class);
+		resource.setMemory(256); // TODO make dynamic
+		launchContext.setResource(resource);
+		
+	    // environment
+	    Map<String, String> env = new HashMap<String, String>();
+	    launchContext.setEnvironment(env);
+	    
+	    // configuration
+	    propertiesFile = YarnUtils.writePropertiesToTemp(properties);
+	    
+	    // HDFS
+	    final String namespace = appId.toString();
+	    final Path masterArchive = YarnUtils.copyToHdfs(ConfigTool.YARN_MASTER_ARCHIVE_PATH, ConfigTool.YARN_MASTER_STAGING, namespace, conf);
+	    final Path masterProperties = YarnUtils.copyToHdfs(propertiesFile.getCanonicalPath(), ConfigTool.YARN_MASTER_PROPERTIES, namespace, conf);
+	    final Path containerArchive = YarnUtils.copyToHdfs(ConfigTool.YARN_CONTAINER_ARCHIVE_PATH, ConfigTool.YARN_CONTAINER_STAGING, namespace, conf);
+	    
+	    // local resources
+	    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
+	    localResources.put(ConfigTool.YARN_MASTER_DESTINATION, 
+	    		YarnUtils.createHdfsResource(masterArchive, LocalResourceType.ARCHIVE, conf));
+	    localResources.put(ConfigTool.YARN_MASTER_PROPERTIES,
+	    		YarnUtils.createHdfsResource(masterProperties, LocalResourceType.FILE, conf));
+        localResources.put(ConfigTool.YARN_CONTAINER_STAGING,
+                YarnUtils.createHdfsResource(containerArchive, LocalResourceType.FILE, conf));
+	    
+	    launchContext.setLocalResources(localResources);
+	    
+	    // user
+	    launchContext.setUser(properties.getYarnUser());
+	    
+	    // app submission
+	    ApplicationSubmissionContext subContext = Records.newRecord(ApplicationSubmissionContext.class);
+		subContext.setApplicationId(appId);
+		subContext.setApplicationName(properties.getProviderName());
+		subContext.setAMContainerSpec(launchContext);
+
+		SubmitApplicationRequest subRequest = Records.newRecord(SubmitApplicationRequest.class);
+		subRequest.setApplicationSubmissionContext(subContext);
+		
+		log.info(String.format("Starting app id '%s'", appId.toString()));
+
+		rmClient.submitApplication(subRequest);
+		
+	}
+
+    @Override
+	public void stop() throws YarnRemoteException {
+		log.info(String.format("Stopping app id '%s'", appId.toString()));
+		KillApplicationRequest killRequest = Records.newRecord(KillApplicationRequest.class);
+		killRequest.setApplicationId(appId);
+
+		rmClient.forceKillApplication(killRequest);
+
+		try { YarnUtils.destroyHdfsNamespace(appId.toString(), conf); } catch(Exception ignore) {}
+		
+		propertiesFile.delete();
+	}
+
+	void connect() {
+		YarnConfiguration yarnConf = new YarnConfiguration(conf);
+		InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(
+				YarnConfiguration.RM_ADDRESS,
+				YarnConfiguration.DEFAULT_RM_ADDRESS));
+		log.info("Connecting to ResourceManager at: " + rmAddress);
+		this.rmClient = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, conf));
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnApplicationProperties.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnApplicationProperties.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnApplicationProperties.java
new file mode 100644
index 0000000..e047179
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnApplicationProperties.java
@@ -0,0 +1,91 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.Properties;
+
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+
+import com.google.common.base.Preconditions;
+
+public class YarnApplicationProperties extends Properties {
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = -2209509977839674160L;
+	
+	public final static String HELIX_ZOOKEEPER = ContainerProcessProperties.HELIX_ZOOKEEPER;
+	public final static String HELIX_CLUSTER = ContainerProcessProperties.HELIX_CLUSTER;
+	public final static String PROVIDER_METADATA = "provider.metadata";
+	public final static String PROVIDER_NAME = "provider.name";
+	public final static String CONTAINER_ID = "container.id";
+    public final static String YARN_RESOURCEMANAGER = "yarn.resourcemananger";
+    public final static String YARN_SCHEDULER = "yarn.scheduler";
+    public final static String YARN_USER = "yarn.user";
+    public final static String YARN_HDFS= "yarn.hdfs";
+
+	public boolean isValidMaster() {
+		return containsKey(HELIX_ZOOKEEPER) &&
+			   containsKey(HELIX_CLUSTER) &&
+			   containsKey(PROVIDER_METADATA) &&
+			   containsKey(PROVIDER_NAME) &&
+			   containsKey(YARN_RESOURCEMANAGER) &&
+			   containsKey(YARN_SCHEDULER) &&
+			   containsKey(YARN_USER) &&
+			   containsKey(YARN_HDFS);
+	}
+	
+	public boolean isValidContainer() {
+		return containsKey(HELIX_ZOOKEEPER) &&
+			   containsKey(HELIX_CLUSTER) &&
+			   containsKey(PROVIDER_METADATA) &&
+			   containsKey(CONTAINER_ID);
+	}
+	
+	public String getHelixZookeeper() {
+		return getProperty(HELIX_ZOOKEEPER);
+	}
+
+	public String getHelixCluster() {
+		return getProperty(HELIX_CLUSTER);
+	}
+
+	public String getProviderMetadata() {
+		return getProperty(PROVIDER_METADATA);
+	}
+
+	public String getProviderName() {
+		return getProperty(PROVIDER_NAME);
+	}
+	
+	public String getContainerId() {
+		return getProperty(CONTAINER_ID);
+	}
+	
+    public String getYarnResourceManager() {
+        return getProperty(YARN_RESOURCEMANAGER);
+    }
+
+    public String getYarnScheduler() {
+        return getProperty(YARN_SCHEDULER);
+    }
+
+    public String getYarnUser() {
+        return getProperty(YARN_USER);
+    }
+
+    public String getYarnHdfs() {
+        return getProperty(YARN_HDFS);
+    }
+
+    @Override
+    public Object get(Object key) {
+        Preconditions.checkState(containsKey(key));
+        return super.get(key);
+    }
+    
+    @Override
+    public String getProperty(String key) {
+        Preconditions.checkState(containsKey(key));
+        return super.getProperty(key);
+    }
+    
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerData.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerData.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerData.java
new file mode 100644
index 0000000..d369a2d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerData.java
@@ -0,0 +1,86 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+/**
+ * Container meta data for YARN-based containers. Reflect lifecycle of container
+ * from requesting, to bootstrapping, active operation and shutdown. Read and
+ * written by {@link YarnMasterProcess}, {@link YarnContainerProvider} and
+ * {@link YarnContainerService}. Also read by {@link YarnStatusProvider}.
+ * Typically stored in zookeeper
+ * 
+ */
+class YarnContainerData {
+
+	static enum ContainerState {
+		ACQUIRE,
+		CONNECTING,
+		ACTIVE,
+		TEARDOWN,
+		FAILED,
+		HALTED,
+		FINALIZE
+	}
+	
+    String                         id;
+    ContainerState                 state;
+    int                            yarnId;
+    String                         owner;
+    YarnContainerProcessProperties properties;
+
+    public YarnContainerData() {
+        // left blank
+    }
+
+    public YarnContainerData(String id, String owner, YarnContainerProcessProperties properties) {
+        this.id = id;
+        this.state = ContainerState.ACQUIRE;
+        this.yarnId = -1;
+        this.owner = owner;
+        this.properties = properties;
+    }
+
+    public String getId() {
+        return id;
+    }
+
+    public YarnContainerData setId(String id) {
+        this.id = id;
+        return this;
+    }
+
+    public ContainerState getState() {
+        return state;
+    }
+
+    public YarnContainerData setState(ContainerState state) {
+        this.state = state;
+        return this;
+    }
+
+    public int getYarnId() {
+        return yarnId;
+    }
+
+    public YarnContainerData setYarnId(int yarnId) {
+        this.yarnId = yarnId;
+        return this;
+    }
+
+    public String getOwner() {
+        return owner;
+    }
+
+    public YarnContainerData setOwner(String owner) {
+        this.owner = owner;
+        return this;
+    }
+
+    public YarnContainerProcessProperties getProperties() {
+        return properties;
+    }
+
+    public YarnContainerData setProperties(YarnContainerProcessProperties properties) {
+        this.properties = properties;
+        return this;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProcess.java
new file mode 100644
index 0000000..2cad52d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProcess.java
@@ -0,0 +1,53 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Host process for {@link ContainerProcess}es spawned by
+ * {@link YarnContainerProvider}. Configured via *.properties file in working
+ * directory. Corresponds to regular container in YARN and is managed jointly by
+ * the application master and the Helix participant. (Program entry point)
+ * 
+ */
+class YarnContainerProcess {
+    static final Logger log = Logger.getLogger(YarnContainerProcess.class);
+
+    public static void main(String[] args) throws Exception {
+        log.trace("BEGIN YarnProcess.main()");
+
+        final YarnContainerProcessProperties properties = YarnUtils.createContainerProcessProperties(YarnUtils
+                .getPropertiesFromPath(YarnUtils.YARN_CONTAINER_PROPERTIES));
+        Preconditions.checkArgument(properties.isValid(), "container properties not valid: %s", properties.toString());
+
+        log.debug("Launching yarndata service");
+        final ZookeeperYarnDataProvider metaService = new ZookeeperYarnDataProvider(properties.getYarnData());
+        metaService.start();
+
+        log.debug("Launching yarn container service");
+        final YarnContainerService yarnService = new YarnContainerService();
+        yarnService.configure(properties);
+        yarnService.setYarnDataProvider(metaService);
+        yarnService.start();
+
+        log.debug("Installing shutdown hooks");
+        Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+            @Override
+            public void run() {
+                log.debug("Running shutdown hook");
+                yarnService.stop();
+                metaService.stop();
+            }
+        }));
+
+        System.out.println("Press ENTER to stop container process");
+        System.in.read();
+
+        log.debug("Stopping container services");
+        System.exit(0);
+
+        log.trace("END YarnProcess.main()");
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProcessProperties.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProcessProperties.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProcessProperties.java
new file mode 100644
index 0000000..5277e2f
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProcessProperties.java
@@ -0,0 +1,40 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base configuration for {@link YarnContainerProcess}. 
+ *
+ */
+public class YarnContainerProcessProperties extends ContainerProcessProperties {
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = -2209509977839674160L;
+	
+	public final static String YARNDATA = "yarndata";
+	
+	public boolean isValid() {
+		return super.isValid() &&
+		       containsKey(YARNDATA);
+	}
+	
+	public String getYarnData() {
+		return getProperty(YARNDATA);
+	}
+
+    @Override
+    public Object get(Object key) {
+        Preconditions.checkState(containsKey(key));
+        return super.get(key);
+    }
+    
+    @Override
+    public String getProperty(String key) {
+        Preconditions.checkState(containsKey(key));
+        return super.getProperty(key);
+    }
+    
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProvider.java
new file mode 100644
index 0000000..c229a26
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProvider.java
@@ -0,0 +1,143 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.helix.metamanager.ContainerProvider;
+import org.apache.helix.metamanager.ContainerProviderService;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * {@link ContainerProvider} spawning YARN-based containers. Reads and writes
+ * meta data using {@link YarnDataProvider}. Works in a distributed setting, but
+ * typically requires access to zookeeper.
+ * 
+ */
+class YarnContainerProvider implements ContainerProviderService {
+	
+	static final Logger log = Logger.getLogger(YarnContainerProvider.class);
+
+	static final long POLL_INTERVAL = 1000;
+	static final long CONTAINER_TIMEOUT = 60000;
+	
+	/*
+	 * CONTAINERS
+	 *   A (A, READY)
+	 *   B (B, RUNNING)
+	 */
+	
+	final Object notifier = new Object();
+    final Map<String, Properties> types = new HashMap<String, Properties>();
+	
+	ZookeeperYarnDataProvider yarnDataService;
+	YarnContainerProviderProcess yarnApp;
+	YarnContainerProviderProperties properties;
+	
+    @Override
+    public void configure(Properties properties) throws Exception {
+        YarnContainerProviderProperties yarnProps = new YarnContainerProviderProperties();
+        yarnProps.putAll(properties);
+        configure(yarnProps);
+    }
+    
+    private void configure(YarnContainerProviderProperties properties) {
+        this.properties = properties;
+        
+        for(String containerType : properties.getContainers()) {
+            registerType(containerType, properties.getContainer(containerType));
+        }
+    }
+    
+    @Override
+    public void start() throws Exception {
+        Preconditions.checkNotNull(properties);
+        Preconditions.checkState(properties.isValid(), "provider properties not valid: %s", properties);
+        
+        log.debug("Starting yarn container provider service");
+        yarnDataService = new ZookeeperYarnDataProvider();
+        yarnDataService.configure(properties);
+        yarnDataService.start();
+    }
+    
+    @Override
+    public void stop() throws Exception {
+        log.debug("Stopping yarn container provider service");
+        destroyAll();
+        
+        if(yarnDataService != null) {
+            yarnDataService.stop();
+            yarnDataService = null;
+        }
+    }
+    
+	@Override
+	public void create(final String id, final String type) throws Exception {
+	    Preconditions.checkArgument(types.containsKey(type), "Container type '%s' is not configured", type);
+	    
+		YarnContainerProcessProperties containerProperties = YarnUtils.createContainerProcessProperties(types.get(type));
+
+        log.info(String.format("Running container '%s' (properties='%s')", id, containerProperties));
+        
+		yarnDataService.create(new YarnContainerData(id, properties.getName(), containerProperties));
+		waitForState(id, ContainerState.ACTIVE);
+	}
+
+	@Override
+	public void destroy(final String id) throws Exception {
+		YarnContainerData meta = yarnDataService.read(id);
+
+		if(meta.state == ContainerState.ACTIVE) {
+			log.info(String.format("Destroying active container, going to teardown"));
+			yarnDataService.update(meta.setState(ContainerState.TEARDOWN));
+			
+		} else if(meta.state == ContainerState.FAILED) {
+			log.info(String.format("Destroying failed container, going to teardown"));
+			yarnDataService.update(meta.setState(ContainerState.TEARDOWN));
+			
+		} else if(meta.state == ContainerState.FINALIZE) {
+			log.info(String.format("Destroying finalized container, skipping"));
+			
+		} else {
+			throw new IllegalStateException(String.format("Container '%s' must be active, failed or finalized", id));
+		}
+		
+		waitForState(id, ContainerState.FINALIZE);
+		yarnDataService.delete(id);
+	}
+
+	@Override
+	public void destroyAll() {
+		try {
+			for(YarnContainerData meta : yarnDataService.readAll()) {
+			    if(meta.owner.equals(properties.getName())) {
+			        try { destroy(meta.id); } catch (Exception ignore) {}
+			    }
+			}
+		} catch (Exception ignore) {
+			// ignore
+		}
+	}
+
+	void waitForState(String id, ContainerState state) throws Exception, InterruptedException, TimeoutException {
+		long limit = System.currentTimeMillis() + CONTAINER_TIMEOUT;
+		YarnContainerData meta = yarnDataService.read(id);
+		while(meta.state != state) {
+			if(System.currentTimeMillis() >= limit) {
+				throw new TimeoutException(String.format("Container '%s' failed to reach state '%s' (currently is '%s')", id, state, meta.state));
+			}
+			Thread.sleep(POLL_INTERVAL);
+			meta = yarnDataService.read(id);
+		}
+	}
+	
+    void registerType(String name, Properties properties) {
+        log.debug(String.format("Registering container type '%s' (properties='%s')", name, properties));
+        types.put(name, properties);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProviderProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProviderProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProviderProcess.java
new file mode 100644
index 0000000..d883dce
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProviderProcess.java
@@ -0,0 +1,158 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.io.File;
+import java.net.InetSocketAddress;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ClientRMProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.Service;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Configurable and runnable service for {@link YarnContainerProvider}
+ * 
+ */
+public class YarnContainerProviderProcess implements Service {
+
+    static final Logger             log                 = Logger.getLogger(YarnContainerProviderProcess.class);
+
+    static String                   YARN_MASTER_COMMAND = "/bin/sh %s 1>%s/stdout 2>%s/stderr";
+
+    Configuration                   conf;
+    YarnRPC                         rpc;
+    ClientRMProtocol                rmClient;
+    ApplicationId                   appId;
+    File                            propertiesFile;
+
+    YarnContainerProviderProperties properties;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        configure(YarnUtils.createContainerProviderProperties(properties));
+    }
+
+    private void configure(YarnContainerProviderProperties properties) {
+        this.conf = new YarnConfiguration();
+        this.conf.set(YarnConfiguration.RM_ADDRESS, properties.getResourceManager());
+        this.conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, properties.getScheduler());
+        this.conf.set(FileSystem.FS_DEFAULT_NAME_KEY, properties.getHdfs());
+
+        this.rpc = YarnRPC.create(conf);
+
+        this.properties = properties;
+    }
+
+    @Override
+    public void start() throws Exception {
+        Preconditions.checkNotNull(properties);
+        Preconditions.checkState(properties.isValid());
+
+        connect();
+
+        String command = String.format(YARN_MASTER_COMMAND, YarnUtils.YARN_MASTER_PATH, ApplicationConstants.LOG_DIR_EXPANSION_VAR,
+                ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+
+        log.info(String.format("Starting application '%s' provider '%s' (masterCommand='%s')", properties.getYarnData(), properties.getName(), command));
+
+        log.debug(String.format("Running master command \"%s\"", command));
+
+        // app id
+        GetNewApplicationRequest appRequest = Records.newRecord(GetNewApplicationRequest.class);
+        GetNewApplicationResponse appResponse = rmClient.getNewApplication(appRequest);
+
+        this.appId = appResponse.getApplicationId();
+
+        log.info(String.format("Acquired app id '%s' for '%s'", appId.toString(), properties.getName()));
+
+        // command
+        ContainerLaunchContext launchContext = Records.newRecord(ContainerLaunchContext.class);
+        launchContext.setCommands(Collections.singletonList(command));
+
+        // resource limit
+        Resource resource = Records.newRecord(Resource.class);
+        resource.setMemory(256); // TODO make dynamic
+        launchContext.setResource(resource);
+
+        // environment
+        Map<String, String> env = new HashMap<String, String>();
+        launchContext.setEnvironment(env);
+
+        // configuration
+        propertiesFile = YarnUtils.writePropertiesToTemp(properties);
+
+        // HDFS
+        final String namespace = appId.toString();
+        final Path masterArchive = YarnUtils.copyToHdfs(YarnUtils.YARN_MASTER_ARCHIVE_PATH, YarnUtils.YARN_MASTER_STAGING, namespace, conf);
+        final Path masterProperties = YarnUtils.copyToHdfs(propertiesFile.getCanonicalPath(), YarnUtils.YARN_MASTER_PROPERTIES, namespace, conf);
+        final Path containerArchive = YarnUtils.copyToHdfs(YarnUtils.YARN_CONTAINER_ARCHIVE_PATH, YarnUtils.YARN_CONTAINER_STAGING, namespace, conf);
+
+        // local resources
+        Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
+        localResources.put(YarnUtils.YARN_MASTER_DESTINATION, YarnUtils.createHdfsResource(masterArchive, LocalResourceType.ARCHIVE, conf));
+        localResources.put(YarnUtils.YARN_MASTER_PROPERTIES, YarnUtils.createHdfsResource(masterProperties, LocalResourceType.FILE, conf));
+        localResources.put(YarnUtils.YARN_CONTAINER_STAGING, YarnUtils.createHdfsResource(containerArchive, LocalResourceType.FILE, conf));
+
+        launchContext.setLocalResources(localResources);
+
+        // user
+        launchContext.setUser(properties.getUser());
+
+        // app submission
+        ApplicationSubmissionContext subContext = Records.newRecord(ApplicationSubmissionContext.class);
+        subContext.setApplicationId(appId);
+        subContext.setApplicationName(properties.getName());
+        subContext.setAMContainerSpec(launchContext);
+
+        SubmitApplicationRequest subRequest = Records.newRecord(SubmitApplicationRequest.class);
+        subRequest.setApplicationSubmissionContext(subContext);
+
+        log.info(String.format("Starting app id '%s'", appId.toString()));
+
+        rmClient.submitApplication(subRequest);
+
+    }
+
+    @Override
+    public void stop() throws YarnRemoteException {
+        log.info(String.format("Stopping app id '%s'", appId.toString()));
+        KillApplicationRequest killRequest = Records.newRecord(KillApplicationRequest.class);
+        killRequest.setApplicationId(appId);
+
+        rmClient.forceKillApplication(killRequest);
+
+		try { YarnUtils.destroyHdfsNamespace(appId.toString(), conf); } catch(Exception ignore) {}
+		
+        propertiesFile.delete();
+    }
+
+    void connect() {
+        YarnConfiguration yarnConf = new YarnConfiguration(conf);
+        InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS));
+        log.info("Connecting to ResourceManager at: " + rmAddress);
+        this.rmClient = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, conf));
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProviderProperties.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProviderProperties.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProviderProperties.java
new file mode 100644
index 0000000..95ad0aa
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProviderProperties.java
@@ -0,0 +1,64 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import org.apache.helix.metamanager.provider.ProviderProperties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base configuration for {@link YarnContainerProviderProcess} 
+ *
+ */
+public class YarnContainerProviderProperties extends ProviderProperties {
+	/**
+     * 
+     */
+    private static final long serialVersionUID = -8853614843205587170L;
+    
+	public final static String YARNDATA = "yarndata";
+    public final static String RESOURCEMANAGER = "resourcemananger";
+    public final static String SCHEDULER = "scheduler";
+    public final static String USER = "user";
+    public final static String HDFS = "hdfs";
+    
+	public boolean isValid() {
+		return super.isValid() &&
+		       containsKey(YARNDATA) &&
+			   containsKey(RESOURCEMANAGER) &&
+			   containsKey(SCHEDULER) &&
+			   containsKey(USER) &&
+			   containsKey(HDFS);
+	}
+	
+	public String getYarnData() {
+		return getProperty(YARNDATA);
+	}
+
+    public String getResourceManager() {
+        return getProperty(RESOURCEMANAGER);
+    }
+
+    public String getScheduler() {
+        return getProperty(SCHEDULER);
+    }
+
+    public String getUser() {
+        return getProperty(USER);
+    }
+
+    public String getHdfs() {
+        return getProperty(HDFS);
+    }
+    
+    @Override
+    public String getProperty(String key) {
+        Preconditions.checkState(containsKey(key));
+        return super.getProperty(key);
+    }
+    
+    @Override
+    public Object get(Object key) {
+        Preconditions.checkState(containsKey(key));
+        return super.get(key);
+    }
+    
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerService.java
new file mode 100644
index 0000000..804d6ed
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerService.java
@@ -0,0 +1,156 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.io.File;
+import java.util.Properties;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.metamanager.container.ContainerUtils;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Configurable and runnable service for YARN-based containers. Continuously
+ * checks container meta data and process state and triggers state changes and
+ * container setup and shutdown.
+ * 
+ */
+class YarnContainerService implements Service {
+    static final Logger            log                       = Logger.getLogger(YarnContainerService.class);
+
+    static final long              CONTAINERSERVICE_INTERVAL = 1000;
+
+    YarnContainerProcessProperties properties;
+
+    YarnDataProvider               metaService;
+    ScheduledExecutorService       executor;
+
+    ContainerProcess               process;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        Preconditions.checkNotNull(properties);
+        YarnContainerProcessProperties containerProperties = new YarnContainerProcessProperties();
+        containerProperties.putAll(properties);
+        Preconditions.checkArgument(containerProperties.isValid());
+
+        this.properties = containerProperties;
+    }
+
+    public void setYarnDataProvider(YarnDataProvider metaService) {
+        this.metaService = metaService;
+    }
+
+    @Override
+    public void start() {
+        Preconditions.checkNotNull(metaService);
+        Preconditions.checkNotNull(properties);
+        Preconditions.checkState(properties.isValid());
+
+        log.debug("starting yarn container service");
+
+        executor = Executors.newSingleThreadScheduledExecutor();
+        executor.scheduleAtFixedRate(new ContainerStatusService(), 0, CONTAINERSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+    }
+
+    @Override
+    public void stop() {
+        log.debug("stopping yarn container service");
+
+        if (executor != null) {
+            executor.shutdown();
+            while (!executor.isTerminated()) {
+                try {
+                    Thread.sleep(100);
+                } catch (InterruptedException e) {
+                    // ignore
+                }
+            }
+            executor = null;
+        }
+
+        destroyLocalContainerNamespace();
+    }
+
+    class ContainerStatusService implements Runnable {
+        @Override
+        public void run() {
+            log.info("updating container status");
+
+            try {
+                if (!metaService.exists(properties.getName())) {
+                    log.warn(String.format("YarnData for '%s' does not exist. Terminating yarn service.", properties.getName()));
+                    process.stop();
+                    stop();
+                }
+
+                YarnContainerData meta = metaService.read(properties.getName());
+
+                if (meta.state == ContainerState.CONNECTING) {
+                    log.trace("container connecting");
+                    try {
+                        ContainerProcessProperties containerProperties = meta.getProperties();
+
+                        containerProperties.setProperty(ContainerProcessProperties.CLUSTER, properties.getCluster());
+                        containerProperties.setProperty(ContainerProcessProperties.ADDRESS, properties.getAddress());
+                        containerProperties.setProperty(ContainerProcessProperties.NAME, properties.getName());
+
+                        process = ContainerUtils.createProcess(containerProperties);
+                        process.start();
+                    } catch (Exception e) {
+                        log.error("Failed to start participant, going to failed", e);
+                    }
+
+                    if (process.isActive()) {
+                        log.trace("process active, activating container");
+                        metaService.update(meta.setState(ContainerState.ACTIVE));
+
+                    } else if (process.isFailed()) {
+                        log.trace("process failed, failing container");
+                        metaService.update(meta.setState(ContainerState.FAILED));
+
+                    } else {
+                        log.trace("process state unknown, failing container");
+                        metaService.update(meta.setState(ContainerState.FAILED));
+                    }
+                }
+
+                if (meta.state == ContainerState.ACTIVE) {
+                    log.trace("container active");
+                    if (process.isFailed()) {
+                        log.trace("process failed, failing container");
+                        metaService.update(meta.setState(ContainerState.FAILED));
+
+                    } else if (!process.isActive()) {
+                        log.trace("process not active, halting container");
+                        process.stop();
+                        metaService.update(meta.setState(ContainerState.HALTED));
+                    }
+                }
+
+                if (meta.state == ContainerState.TEARDOWN) {
+                    log.trace("container teardown");
+                    process.stop();
+                    metaService.update(meta.setState(ContainerState.HALTED));
+                }
+
+            } catch (Exception e) {
+                log.error(String.format("Error while updating container '%s' status", properties.getName()), e);
+            }
+        }
+    }
+
+    public static void destroyLocalContainerNamespace() {
+        log.info("cleaning up container directory");
+        FileUtils.deleteQuietly(new File(YarnUtils.YARN_CONTAINER_DESTINATION));
+        FileUtils.deleteQuietly(new File(YarnUtils.YARN_CONTAINER_PROPERTIES));
+    }
+
+}


[07/15] Adding Helix-task-framework and Yarn integration modules

Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/RedisTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/RedisTargetProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/RedisTargetProvider.java
new file mode 100644
index 0000000..94c617d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/RedisTargetProvider.java
@@ -0,0 +1,329 @@
+package org.apache.helix.metamanager.cluster;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.metamanager.ClusterStatusProvider;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+public class RedisTargetProvider implements ClusterStatusProvider {
+
+    static final Logger        log               = Logger.getLogger(RedisTargetProvider.class);
+
+    public static final String BENCHMARK_COMMAND = "redis-benchmark";
+    public static final String BENCHMARK_TESTS   = "GET,SET";
+
+    public static final String DEFAULT_RECORDS   = "100000";
+    public static final String DEFAULT_CLIENTS   = "20";
+    public static final String DEFAULT_REQUESTS  = "100000";
+    public static final String DEFAULT_TIMEOUT   = "8000";
+    public static final String DEFAULT_INTERVAL  = "10000";
+
+    ZkClient                   zookeeper;
+
+    final String               address;
+    final String               root;
+
+    final int                  records;
+    final int                  clients;
+    final int                  requests;
+    final int                  timeout;
+    final int                  interval;
+
+    int                        targetTpsGet;
+    int                        targetTpsSet;
+    int                        targetCount       = 1;
+
+    ScheduledExecutorService   executor;
+
+    public RedisTargetProvider(Properties properties) {
+        address = properties.getProperty("address");
+        root = properties.getProperty("root");
+        targetTpsGet = Integer.valueOf(properties.getProperty("tps.get", "0"));
+        targetTpsSet = Integer.valueOf(properties.getProperty("tps.set", "0"));
+        records = Integer.valueOf(properties.getProperty("records", DEFAULT_RECORDS));
+        clients = Integer.valueOf(properties.getProperty("clients", DEFAULT_CLIENTS));
+        requests = Integer.valueOf(properties.getProperty("requests", DEFAULT_REQUESTS));
+        timeout = Integer.valueOf(properties.getProperty("timeout", DEFAULT_TIMEOUT));
+        interval = Integer.valueOf(properties.getProperty("interval", DEFAULT_INTERVAL));
+    }
+
+    public void startService() {
+        log.debug("starting redis status service");
+        zookeeper = new ZkClient(address);
+        zookeeper.createPersistent("/" + root, true);
+
+        // TODO not concurrency-safe, should not matter though
+        if (!zookeeper.exists("/" + root + "/target.get")) {
+            try {
+                zookeeper.createPersistent("/" + root + "/target.get", String.valueOf(targetTpsGet));
+            } catch (Exception ignore) {
+            }
+        }
+        if (!zookeeper.exists("/" + root + "/target.set")) {
+            try {
+                zookeeper.createPersistent("/" + root + "/target.set", String.valueOf(targetTpsSet));
+            } catch (Exception ignore) {
+            }
+        }
+
+        executor = Executors.newSingleThreadScheduledExecutor();
+        executor.scheduleAtFixedRate(new RedisBenchmarkRunnable(), 0, interval, TimeUnit.MILLISECONDS);
+    }
+
+    public void stopService() {
+        log.debug("stopping redis status service");
+        if (executor != null) {
+            executor.shutdownNow();
+            while (!executor.isTerminated()) {
+                try {
+                    Thread.sleep(100);
+                } catch (InterruptedException e) {
+                    // ignore
+                }
+            }
+            executor = null;
+        }
+        if (zookeeper != null) {
+            zookeeper.close();
+            zookeeper = null;
+        }
+    }
+
+    @Override
+    public int getTargetContainerCount(String containerType) throws Exception {
+        return targetCount;
+    }
+
+    private class RedisBenchmarkRunnable implements Runnable {
+        ExecutorService executor = Executors.newCachedThreadPool();
+        RedisResult     aggregateResult;
+
+        @Override
+        public void run() {
+            log.debug("running redis benchmark");
+
+            aggregateResult = new RedisResult(0);
+            Collection<Future<RedisResult>> futures = new ArrayList<Future<RedisResult>>();
+
+            try {
+                Collection<RedisTarget> targets = getTargets();
+
+                // start benchmark
+                for (RedisTarget target : targets) {
+                    log.debug(String.format("submitting target '%s'", target));
+                    Future<RedisResult> future = executor.submit(new RedisCallable(target));
+                    futures.add(future);
+                }
+
+                // aggregate results
+                try {
+                    log.debug("waiting for results");
+
+                    long limit = System.currentTimeMillis() + timeout;
+                    for (Future<RedisResult> future : futures) {
+                        try {
+                            RedisResult result = future.get(limit - System.currentTimeMillis(), TimeUnit.MILLISECONDS);
+                            log.debug(String.format("got result '%s'", result));
+                            aggregate(result);
+                        } catch (Exception e) {
+                            log.warn(String.format("failed to get result"));
+                            future.cancel(true);
+                        }
+                    }
+                } catch (Exception e) {
+                    log.error("Error running redis benchmark", e);
+
+                    for (Future<RedisResult> future : futures) {
+                        future.cancel(true);
+                    }
+
+                    return;
+                }
+
+                // compare to thresholds
+                log.debug(String.format("aggregate result is '%s'", aggregateResult));
+
+                // get target from zookeeper
+                try {
+                    targetTpsGet = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.get"));
+                } catch (Exception ignore) {
+                }
+                try {
+                    targetTpsSet = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.set"));
+                } catch (Exception ignore) {
+                }
+
+                // calculate counts
+                int targetCountGet = -1;
+                if (aggregateResult.containsKey("GET")) {
+                    double tpsTarget = targetTpsGet;
+                    double tps = aggregateResult.get("GET");
+
+                    targetCountGet = (int) Math.ceil(tpsTarget / tps * aggregateResult.serverCount);
+                    log.debug(String.format("count.get=%d, tps.get=%f, target.get=%f", targetCountGet, tps, tpsTarget));
+                }
+
+                int targetCountSet = -1;
+                if (aggregateResult.containsKey("SET")) {
+                    double tpsTarget = targetTpsSet;
+                    double tps = aggregateResult.get("SET");
+
+                    targetCountSet = (int) Math.ceil(tpsTarget / tps * aggregateResult.serverCount);
+                    log.debug(String.format("count.set=%d, tps.set=%f, target.set=%f", targetCountSet, tps, tpsTarget));
+                }
+
+                targetCount = Math.max(targetCountGet, targetCountSet);
+                targetCount = Math.max(targetCount, 1);
+
+                log.debug(String.format("target count is %d", targetCount));
+                RedisTargetProvider.this.targetCount = targetCount;
+
+            } catch (Exception e) {
+                log.error("Error running redis benchmark", e);
+
+                for (Future<RedisResult> future : futures) {
+                    future.cancel(true);
+                }
+            }
+
+        }
+
+        Collection<RedisTarget> getTargets() {
+            log.debug("fetching redis servers from zookeeper");
+            Collection<RedisTarget> targets = new ArrayList<RedisTarget>();
+            Collection<String> servers = zookeeper.getChildren("/" + root);
+
+            servers.remove("target.get");
+            servers.remove("target.set");
+
+            for (String server : servers) {
+                String hostname = zookeeper.readData("/" + root + "/" + server + "/hostname");
+                int port = Integer.valueOf(zookeeper.<String> readData("/" + root + "/" + server + "/port"));
+
+                targets.add(new RedisTarget(hostname, port));
+            }
+
+            log.debug(String.format("found %d servers: %s", targets.size(), targets));
+            return targets;
+        }
+
+        void aggregate(RedisResult result) {
+            RedisResult newResult = new RedisResult(aggregateResult.serverCount + result.serverCount);
+
+            for (Entry<String, Double> entry : result.entrySet()) {
+                double current = 0.0d;
+                if (aggregateResult.containsKey(entry.getKey()))
+                    current = aggregateResult.get(entry.getKey());
+
+                current += entry.getValue();
+                newResult.put(entry.getKey(), current);
+            }
+
+            aggregateResult = newResult;
+        }
+    }
+
+    private static class RedisTarget {
+        final String hostname;
+        final int    port;
+
+        public RedisTarget(String hostname, int port) {
+            this.hostname = hostname;
+            this.port = port;
+        }
+
+        @Override
+        public String toString() {
+            return String.format("%s:%d", hostname, port);
+        }
+    }
+
+    private static class RedisResult extends HashMap<String, Double> {
+        /**
+         * 
+         */
+        private static final long serialVersionUID = 4599748807597500952L;
+
+        final int                 serverCount;
+
+        public RedisResult(int serverCount) {
+            this.serverCount = serverCount;
+        }
+
+        @Override
+        public String toString() {
+            return String.format("[serverCount=%d %s]", serverCount, super.toString());
+        }
+    }
+
+    private class RedisCallable implements Callable<RedisResult> {
+        final RedisTarget target;
+
+        public RedisCallable(RedisTarget target) {
+            this.target = target;
+        }
+
+        @Override
+        public RedisResult call() throws Exception {
+            log.debug(String.format("executing benchmark for '%s'", target));
+
+            ProcessBuilder builder = new ProcessBuilder();
+            builder.command(BENCHMARK_COMMAND, "-h", target.hostname, "-p", String.valueOf(target.port), "-r", String.valueOf(records), "-n",
+                    String.valueOf(requests), "-c", String.valueOf(clients), "-t", BENCHMARK_TESTS, "--csv");
+            Process process = builder.start();
+
+            log.debug(String.format("running '%s'", builder.command()));
+
+            RedisResult result = new RedisResult(1);
+
+            int retVal;
+            try {
+                retVal = process.waitFor();
+            } catch (InterruptedException e) {
+                process.destroy();
+                return result;
+            }
+
+            Preconditions.checkState(retVal == 0, "Benchmark process returned %s", retVal);
+
+            Pattern pattern = Pattern.compile("\"([A-Z0-9_]+).*\",\"([0-9\\.]+)\"");
+
+            log.debug("parsing output");
+            BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));
+            String line = null;
+            while ((line = reader.readLine()) != null) {
+                Matcher matcher = pattern.matcher(line);
+
+                if (!matcher.find())
+                    continue;
+
+                String key = matcher.group(1);
+                Double value = Double.valueOf(matcher.group(2));
+
+                result.put(key, value);
+            }
+
+            log.debug(String.format("benchmark for '%s' returned '%s'", target, result));
+
+            return result;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/StaticTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/StaticTargetProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/StaticTargetProvider.java
new file mode 100644
index 0000000..47bf725
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/StaticTargetProvider.java
@@ -0,0 +1,41 @@
+package org.apache.helix.metamanager.cluster;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.ClusterStatusProvider;
+
+
+public class StaticTargetProvider implements ClusterStatusProvider {
+
+	final Map<String, Integer> targetCounts = new HashMap<String, Integer>();
+	
+	public StaticTargetProvider() {
+	    // left blank
+	}
+	
+	public StaticTargetProvider(Properties properties) {
+	    for(Entry<Object, Object> entry : properties.entrySet()) {
+	        String key = (String)entry.getKey();
+	        int value = Integer.valueOf((String)entry.getValue());
+	        
+	        targetCounts.put(key, value);
+	    }
+	}
+	
+	public StaticTargetProvider(Map<String, Integer> targetCounts) {
+		this.targetCounts.putAll(targetCounts);
+	}
+	
+	@Override
+	public int getTargetContainerCount(String containerType) {
+		return targetCounts.get(containerType);
+	}
+
+	public void setTargetContainerCount(String containerType, int targetCount) {
+		targetCounts.put(containerType, targetCount);
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerProcess.java
new file mode 100644
index 0000000..11ad86e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerProcess.java
@@ -0,0 +1,133 @@
+package org.apache.helix.metamanager.container;
+
+import java.util.Properties;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base service for spawn-able container types. Configure from Properties and
+ * instantiates Helix participant to managed cluster.
+ * 
+ */
+public abstract class ContainerProcess implements Service {
+    static final Logger                             log    = Logger.getLogger(ContainerProcess.class);
+
+    private ContainerProcessProperties              properties;
+    private HelixManager                            participantManager;
+
+    private String                                  modelName;
+    private StateModelFactory<? extends StateModel> modelFactory;
+
+    private String                                  instanceName;
+    private String                                  clusterName;
+    private String                                  zookeeperAddress;
+
+    private boolean                                 active = false;
+    private boolean                                 failed = false;
+
+    public final void setModelName(String modelName) {
+        this.modelName = modelName;
+    }
+
+    public final void setModelFactory(StateModelFactory<? extends StateModel> modelFactory) {
+        this.modelFactory = modelFactory;
+    }
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        ContainerProcessProperties containerProps = new ContainerProcessProperties();
+        containerProps.putAll(properties);
+        Preconditions.checkArgument(containerProps.isValid());
+
+        this.properties = containerProps;
+        this.instanceName = containerProps.getName();
+        this.clusterName = containerProps.getCluster();
+        this.zookeeperAddress = containerProps.getAddress();
+    }
+
+    @Override
+    public final void start() {
+        try {
+            Preconditions.checkNotNull(modelName, "state model name not set");
+            Preconditions.checkNotNull(modelFactory, "state model factory not set");
+            Preconditions.checkState(properties.isValid(), "process properties not valid: %s", properties.toString());
+
+            log.info(String.format("starting container '%s'", instanceName));
+            startContainer();
+
+            log.info(String.format("starting helix participant '%s'", instanceName));
+            startParticipant();
+
+            active = true;
+
+        } catch (Exception e) {
+            log.error(String.format("starting container '%s' failed", instanceName), e);
+            fail();
+        }
+    }
+
+    protected abstract void startContainer() throws Exception;
+
+    private final void startParticipant() throws Exception {
+        participantManager = HelixManagerFactory.getZKHelixManager(clusterName, instanceName, InstanceType.PARTICIPANT, zookeeperAddress);
+        participantManager.getStateMachineEngine().registerStateModelFactory(modelName, modelFactory);
+        participantManager.connect();
+    }
+
+    @Override
+    public final void stop() {
+        try {
+            log.info(String.format("stopping helix participant '%s'", instanceName));
+            stopParticipant();
+
+            log.info(String.format("stopping container '%s'", instanceName));
+            stopContainer();
+
+            active = false;
+
+        } catch (Exception e) {
+            log.warn(String.format("stopping container '%s' failed", instanceName), e);
+        }
+    }
+
+    protected abstract void stopContainer() throws Exception;
+
+    private final void stopParticipant() {
+        if (participantManager != null) {
+            participantManager.disconnect();
+        }
+    }
+
+    public final void fail() {
+        failed = true;
+    }
+
+    public final boolean isActive() {
+        return active && !failed;
+    }
+
+    public final boolean isFailed() {
+        return failed;
+    }
+
+    public final ContainerProcessProperties getProperties() {
+        return properties;
+    }
+
+    String getModelName() {
+        return modelName;
+    }
+
+    StateModelFactory<? extends StateModel> getModelFactory() {
+        return modelFactory;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerProcessProperties.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerProcessProperties.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerProcessProperties.java
new file mode 100644
index 0000000..1a6d272
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerProcessProperties.java
@@ -0,0 +1,66 @@
+package org.apache.helix.metamanager.container;
+
+import java.util.Properties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base configuration for ContainerProcess. 
+ * 
+ */
+public class ContainerProcessProperties extends Properties {
+    /**
+	 * 
+	 */
+    private static final long  serialVersionUID = 5754863079470995536L;
+
+    public static final String CLUSTER          = "cluster";
+    public static final String ADDRESS          = "address";
+    public static final String NAME             = "name";
+    public static final String CONTAINER_CLASS  = "class";
+
+    public ContainerProcessProperties() {
+        // left blank
+    }
+
+    public ContainerProcessProperties(Properties properties) {
+        Preconditions.checkNotNull(properties);
+        putAll(properties);
+    }
+	
+	public boolean isValid() {
+		return containsKey(CLUSTER) &&
+			   containsKey(NAME) &&
+			   containsKey(ADDRESS) &&
+			   containsKey(CONTAINER_CLASS);
+	}
+	
+    public String getCluster() {
+        return getProperty(CLUSTER);
+    }
+
+    public String getAddress() {
+        return getProperty(ADDRESS);
+    }
+
+    public String getName() {
+        return getProperty(NAME);
+    }
+
+    public String getContainerClass() {
+        return getProperty(CONTAINER_CLASS);
+    }
+
+    @Override
+    public synchronized Object get(Object key) {
+        Preconditions.checkState(containsKey(key));
+        return super.get(key);
+    }
+
+    @Override
+    public String getProperty(String key) {
+        Preconditions.checkState(containsKey(key));
+        return super.getProperty(key);
+    }
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerStateModel.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerStateModel.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerStateModel.java
new file mode 100644
index 0000000..9ac6b5c
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerStateModel.java
@@ -0,0 +1,64 @@
+package org.apache.helix.metamanager.container;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+@StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+public class ContainerStateModel extends StateModel {
+	
+	static final Logger log = Logger.getLogger(ContainerStateModel.class);
+	
+	@Transition(from = "OFFLINE", to = "SLAVE")
+	public void offlineToSlave(Message m, NotificationContext context) {
+		log.trace(String.format("%s transitioning from OFFLINE to SLAVE",
+				context.getManager().getInstanceName()));
+	}
+
+	@Transition(from = "SLAVE", to = "OFFLINE")
+	public void slaveToOffline(Message m, NotificationContext context) {
+		log.trace(String.format("%s transitioning from SLAVE to OFFLINE",
+				context.getManager().getInstanceName()));
+	}
+
+	@Transition(from = "SLAVE", to = "MASTER")
+	public void slaveToMaster(Message m, NotificationContext context) {
+		log.trace(String.format("%s transitioning from SLAVE to MASTER",
+				context.getManager().getInstanceName()));
+	}
+
+	@Transition(from = "MASTER", to = "SLAVE")
+	public void masterToSlave(Message m, NotificationContext context) {
+		log.trace(String.format("%s transitioning from MASTER to SLAVE",
+				context.getManager().getInstanceName()));
+	}
+
+	@Transition(from = "OFFLINE", to = "DROPPED")
+	public void offlineToDropped(Message m, NotificationContext context) {
+		log.trace(String.format("%s transitioning from OFFLINE to DROPPED",
+				context.getManager().getInstanceName()));
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerStateModelFactory.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerStateModelFactory.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerStateModelFactory.java
new file mode 100644
index 0000000..ab5a099
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerStateModelFactory.java
@@ -0,0 +1,30 @@
+package org.apache.helix.metamanager.container;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.participant.statemachine.StateModelFactory;
+
+public class ContainerStateModelFactory extends StateModelFactory<ContainerStateModel> {
+
+	@Override
+	public ContainerStateModel createNewStateModel(String partitionName) {
+		return new ContainerStateModel();
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerUtils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerUtils.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerUtils.java
new file mode 100644
index 0000000..3d32862
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerUtils.java
@@ -0,0 +1,46 @@
+package org.apache.helix.metamanager.container;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.lang.reflect.Constructor;
+
+import org.apache.log4j.Logger;
+
+/**
+ * Utility for loading ContainerProperties and spawning ContainerProcess.
+ * 
+ */
+public class ContainerUtils {
+
+    static final Logger log = Logger.getLogger(ContainerUtils.class);
+
+    private ContainerUtils() {
+        // left blank
+    }
+
+    public static ContainerProcess createProcess(ContainerProcessProperties properties) throws Exception {
+        String containerClassName = properties.getContainerClass();
+
+        Class<?> containerClass = Class.forName(containerClassName);
+
+        log.debug(String.format("checking for properties constructor in class '%s'", containerClassName));
+
+        Constructor<?> constructor = containerClass.getConstructor(ContainerProcessProperties.class);
+
+        return (ContainerProcess) constructor.newInstance(properties);
+    }
+
+    public static ContainerProcessProperties getPropertiesFromResource(String resourceName) throws IOException {
+        ContainerProcessProperties properties = new ContainerProcessProperties();
+        properties.load(ClassLoader.getSystemResourceAsStream(resourceName));
+        return properties;
+    }
+
+    public static ContainerProcessProperties getPropertiesFromPath(String filePath) throws IOException {
+        ContainerProcessProperties properties = new ContainerProcessProperties();
+        properties.load(new InputStreamReader(new FileInputStream(filePath)));
+        return properties;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyMasterSlaveProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyMasterSlaveProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyMasterSlaveProcess.java
new file mode 100644
index 0000000..d91d77c
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyMasterSlaveProcess.java
@@ -0,0 +1,76 @@
+package org.apache.helix.metamanager.container.impl;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+public class DummyMasterSlaveProcess extends ContainerProcess {
+
+	static final Logger log = Logger.getLogger(DummyMasterSlaveProcess.class);
+	
+	public DummyMasterSlaveProcess(ContainerProcessProperties properties) {
+		super(properties);
+		setModelName("MasterSlave");
+		setModelFactory(new DummyMasterSlaveModelFactory());
+	}
+
+	@Override
+	protected void startContainer() throws Exception {
+		log.info("starting dummy process container");
+	}
+
+	@Override
+	protected void stopContainer() throws Exception {
+		log.info("stopping dummy process container");
+	}
+
+	public static class DummyMasterSlaveModelFactory extends StateModelFactory<DummyMasterSlaveStateModel> {
+		@Override
+		public DummyMasterSlaveStateModel createNewStateModel(String partitionName) {
+			return new DummyMasterSlaveStateModel();
+		}
+	}
+	
+	@StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+	public static class DummyMasterSlaveStateModel extends StateModel {
+		
+		static final Logger log = Logger.getLogger(DummyMasterSlaveStateModel.class);
+		
+		@Transition(from = "OFFLINE", to = "SLAVE")
+		public void offlineToSlave(Message m, NotificationContext context) {
+			log.trace(String.format("%s transitioning from OFFLINE to SLAVE",
+					context.getManager().getInstanceName()));
+		}
+
+		@Transition(from = "SLAVE", to = "OFFLINE")
+		public void slaveToOffline(Message m, NotificationContext context) {
+			log.trace(String.format("%s transitioning from SLAVE to OFFLINE",
+					context.getManager().getInstanceName()));
+		}
+
+		@Transition(from = "SLAVE", to = "MASTER")
+		public void slaveToMaster(Message m, NotificationContext context) {
+			log.trace(String.format("%s transitioning from SLAVE to MASTER",
+					context.getManager().getInstanceName()));
+		}
+
+		@Transition(from = "MASTER", to = "SLAVE")
+		public void masterToSlave(Message m, NotificationContext context) {
+			log.trace(String.format("%s transitioning from MASTER to SLAVE",
+					context.getManager().getInstanceName()));
+		}
+
+		@Transition(from = "OFFLINE", to = "DROPPED")
+		public void offlineToDropped(Message m, NotificationContext context) {
+			log.trace(String.format("%s transitioning from OFFLINE to DROPPED",
+					context.getManager().getInstanceName()));
+		}
+
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyOnlineOfflineProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyOnlineOfflineProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyOnlineOfflineProcess.java
new file mode 100644
index 0000000..d5015f4
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyOnlineOfflineProcess.java
@@ -0,0 +1,64 @@
+package org.apache.helix.metamanager.container.impl;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+public class DummyOnlineOfflineProcess extends ContainerProcess {
+
+	static final Logger log = Logger.getLogger(DummyOnlineOfflineProcess.class);
+	
+	public DummyOnlineOfflineProcess(ContainerProcessProperties properties) {
+		super(properties);
+		setModelName("OnlineOffline");
+		setModelFactory(new DummyOnlineOfflineModelFactory());
+	}
+
+	@Override
+	protected void startContainer() throws Exception {
+		log.info("starting dummy online-offline process container");
+	}
+
+	@Override
+	protected void stopContainer() throws Exception {
+		log.info("stopping dummy online-offline process container");
+	}
+
+	public static class DummyOnlineOfflineModelFactory extends StateModelFactory<DummyOnlineOfflineStateModel> {
+		@Override
+		public DummyOnlineOfflineStateModel createNewStateModel(String partitionName) {
+			return new DummyOnlineOfflineStateModel();
+		}
+	}
+	
+	@StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE", "DROPPED" })
+	public static class DummyOnlineOfflineStateModel extends StateModel {
+		
+		static final Logger log = Logger.getLogger(DummyOnlineOfflineStateModel.class);
+		
+		@Transition(from = "OFFLINE", to = "ONLINE")
+		public void offlineToOnline(Message m, NotificationContext context) {
+			log.trace(String.format("%s transitioning from OFFLINE to ONLINE",
+					context.getManager().getInstanceName()));
+		}
+
+		@Transition(from = "ONLINE", to = "OFFLINE")
+		public void onlineToOffline(Message m, NotificationContext context) {
+			log.trace(String.format("%s transitioning from ONLINE to OFFLINE",
+					context.getManager().getInstanceName()));
+		}
+
+		@Transition(from = "OFFLINE", to = "DROPPED")
+		public void offlineToDropped(Message m, NotificationContext context) {
+			log.trace(String.format("%s transitioning from OFFLINE to DROPPED",
+					context.getManager().getInstanceName()));
+		}
+
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyProcess.java
new file mode 100644
index 0000000..b4963a7
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyProcess.java
@@ -0,0 +1,76 @@
+package org.apache.helix.metamanager.container.impl;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+public class DummyProcess extends ContainerProcess {
+
+	static final Logger log = Logger.getLogger(DummyProcess.class);
+	
+	public DummyProcess(ContainerProcessProperties properties) {
+		super(properties);
+		setModelName("MasterSlave");
+		setModelFactory(new DummyModelFactory());
+	}
+
+	@Override
+	protected void startContainer() throws Exception {
+		log.info("starting dummy process container");
+	}
+
+	@Override
+	protected void stopContainer() throws Exception {
+		log.info("stopping dummy process container");
+	}
+
+	public static class DummyModelFactory extends StateModelFactory<DummyStateModel> {
+		@Override
+		public DummyStateModel createNewStateModel(String partitionName) {
+			return new DummyStateModel();
+		}
+	}
+	
+	@StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+	public static class DummyStateModel extends StateModel {
+		
+		static final Logger log = Logger.getLogger(DummyStateModel.class);
+		
+		@Transition(from = "OFFLINE", to = "SLAVE")
+		public void offlineToSlave(Message m, NotificationContext context) {
+			log.trace(String.format("%s transitioning from OFFLINE to SLAVE",
+					context.getManager().getInstanceName()));
+		}
+
+		@Transition(from = "SLAVE", to = "OFFLINE")
+		public void slaveToOffline(Message m, NotificationContext context) {
+			log.trace(String.format("%s transitioning from SLAVE to OFFLINE",
+					context.getManager().getInstanceName()));
+		}
+
+		@Transition(from = "SLAVE", to = "MASTER")
+		public void slaveToMaster(Message m, NotificationContext context) {
+			log.trace(String.format("%s transitioning from SLAVE to MASTER",
+					context.getManager().getInstanceName()));
+		}
+
+		@Transition(from = "MASTER", to = "SLAVE")
+		public void masterToSlave(Message m, NotificationContext context) {
+			log.trace(String.format("%s transitioning from MASTER to SLAVE",
+					context.getManager().getInstanceName()));
+		}
+
+		@Transition(from = "OFFLINE", to = "DROPPED")
+		public void offlineToDropped(Message m, NotificationContext context) {
+			log.trace(String.format("%s transitioning from OFFLINE to DROPPED",
+					context.getManager().getInstanceName()));
+		}
+
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/RedisServerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/RedisServerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/RedisServerProcess.java
new file mode 100644
index 0000000..d084a71
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/RedisServerProcess.java
@@ -0,0 +1,135 @@
+package org.apache.helix.metamanager.container.impl;
+
+import java.net.InetAddress;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+public class RedisServerProcess extends ContainerProcess {
+
+    static final Logger        log                  = Logger.getLogger(RedisServerProcess.class);
+
+    public static final String REDIS_SERVER_COMMAND = "redis-server";
+
+    public static final long   MONITOR_INTERVAL     = 5000;
+
+    ZkClient                   zookeeper;
+
+    final String               address;
+    final String               root;
+    final String               name;
+    final int                  basePort;
+
+    Process                    process;
+
+    ScheduledExecutorService   executor;
+
+    public RedisServerProcess(ContainerProcessProperties properties) {
+        super(properties);
+
+        setModelName("OnlineOffline");
+        setModelFactory(new RedisServerModelFactory());
+
+        address = properties.getProperty("address");
+        root = properties.getProperty("root");
+        basePort = Integer.valueOf(properties.getProperty("baseport"));
+        name = properties.getProperty(ContainerProcessProperties.HELIX_INSTANCE);
+    }
+
+    @Override
+    protected void startContainer() throws Exception {
+        log.info(String.format("starting redis server container for instance '%s'", name));
+
+        String hostname = InetAddress.getLocalHost().getHostName();
+        int port = basePort + Integer.valueOf(name.split("_")[1]);
+
+        log.debug(String.format("Starting redis server at '%s:%d'", hostname, port));
+
+        ProcessBuilder builder = new ProcessBuilder();
+        builder.command(REDIS_SERVER_COMMAND, "--port", String.valueOf(port));
+        process = builder.start();
+
+        log.debug("Updating zookeeper");
+        zookeeper = new ZkClient(address);
+        zookeeper.deleteRecursive("/" + root + "/" + name);
+        zookeeper.createPersistent("/" + root + "/" + name, true);
+        zookeeper.createPersistent("/" + root + "/" + name + "/hostname", hostname);
+        zookeeper.createPersistent("/" + root + "/" + name + "/port", String.valueOf(port));
+
+        log.debug("Starting process monitor");
+        executor = Executors.newSingleThreadScheduledExecutor();
+        executor.scheduleAtFixedRate(new ProcessMonitor(), 0, MONITOR_INTERVAL, TimeUnit.MILLISECONDS);
+
+    }
+
+    @Override
+    protected void stopContainer() throws Exception {
+        log.info("stopping redis server container");
+
+        log.debug("Stopping process monitor");
+        executor.shutdownNow();
+
+        log.debug("Updating zookeeper");
+        zookeeper.deleteRecursive("/" + root + "/" + name);
+        zookeeper.close();
+
+        log.debug("Stopping process");
+        process.destroy();
+        process.waitFor();
+    }
+
+    public class RedisServerModelFactory extends StateModelFactory<RedisServerModel> {
+        @Override
+        public RedisServerModel createNewStateModel(String partitionName) {
+            return new RedisServerModel();
+        }
+    }
+
+    @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE", "DROPPED" })
+    public class RedisServerModel extends StateModel {
+
+        @Transition(from = "OFFLINE", to = "ONLINE")
+        public void offlineToSlave(Message m, NotificationContext context) {
+            // left blank
+            log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+        }
+
+        @Transition(from = "ONLINE", to = "OFFLINE")
+        public void slaveToOffline(Message m, NotificationContext context) {
+            // left blank
+            log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+        }
+
+        @Transition(from = "OFFLINE", to = "DROPPED")
+        public void offlineToDropped(Message m, NotificationContext context) {
+            // left blank
+            log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+        }
+
+    }
+
+    private class ProcessMonitor implements Runnable {
+        @Override
+        public void run() {
+            try {
+                process.exitValue();
+                log.warn("detected process failure");
+                fail();
+            } catch (Exception e) {
+                // expected
+            }
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/ZookeeperMasterSlaveProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/ZookeeperMasterSlaveProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/ZookeeperMasterSlaveProcess.java
new file mode 100644
index 0000000..f8bbc85
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/ZookeeperMasterSlaveProcess.java
@@ -0,0 +1,104 @@
+package org.apache.helix.metamanager.container.impl;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+public class ZookeeperMasterSlaveProcess extends ContainerProcess {
+
+    static final Logger log = Logger.getLogger(ZookeeperMasterSlaveProcess.class);
+
+    ZkClient            zookeeper;
+
+    final String        address;
+    final String        root;
+    final String        name;
+
+    public ZookeeperMasterSlaveProcess(ContainerProcessProperties properties) {
+        super(properties);
+
+        setModelName("MasterSlave");
+        setModelFactory(new ZookeeperMasterSlaveModelFactory());
+
+        address = properties.getProperty("address");
+        root = properties.getProperty("root");
+        name = properties.getProperty(ContainerProcessProperties.HELIX_INSTANCE);
+    }
+
+    @Override
+    protected void startContainer() throws Exception {
+        log.info("starting zookeeper process container");
+
+        zookeeper = new ZkClient(address);
+        zookeeper.createPersistent("/" + root + "/" + name, true);
+    }
+
+    @Override
+    protected void stopContainer() throws Exception {
+        log.info("stopping zookeeper process container");
+
+        zookeeper.close();
+    }
+
+    public class ZookeeperMasterSlaveModelFactory extends StateModelFactory<ZookeeperMasterSlaveModel> {
+        @Override
+        public ZookeeperMasterSlaveModel createNewStateModel(String partitionName) {
+            return new ZookeeperMasterSlaveModel();
+        }
+    }
+
+    @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+    public class ZookeeperMasterSlaveModel extends StateModel {
+
+        @Transition(from = "OFFLINE", to = "SLAVE")
+        public void offlineToSlave(Message m, NotificationContext context) {
+            transition(m, context);
+        }
+
+        @Transition(from = "SLAVE", to = "OFFLINE")
+        public void slaveToOffline(Message m, NotificationContext context) {
+            transition(m, context);
+        }
+
+        @Transition(from = "SLAVE", to = "MASTER")
+        public void slaveToMaster(Message m, NotificationContext context) {
+            transition(m, context);
+        }
+
+        @Transition(from = "MASTER", to = "SLAVE")
+        public void masterToSlave(Message m, NotificationContext context) {
+            transition(m, context);
+        }
+
+        @Transition(from = "OFFLINE", to = "DROPPED")
+        public void offlineToDropped(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+
+            String resource = m.getResourceName();
+            String partition = m.getPartitionName();
+            String path = "/" + root + "/" + name + "/" + resource + "_" + partition;
+
+            zookeeper.delete(path);
+        }
+
+        public void transition(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+
+            String resource = m.getResourceName();
+            String partition = m.getPartitionName();
+            String path = "/" + root + "/" + name + "/" + resource + "_" + partition;
+
+            zookeeper.delete(path);
+            zookeeper.createEphemeral(path, m.getToState());
+        }
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/FileTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/FileTargetProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/FileTargetProvider.java
new file mode 100644
index 0000000..6eac3e8
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/FileTargetProvider.java
@@ -0,0 +1,51 @@
+package org.apache.helix.metamanager.impl;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.TargetProviderService;
+
+/**
+ * File-based target model. Container count is extracted from properties file. Count may change dynamically.
+ * 
+ */
+public class FileTargetProvider implements TargetProviderService {
+
+    File file;
+
+    public FileTargetProvider() {
+        // left blank
+    }
+
+    public FileTargetProvider(String path) {
+        this.file = new File(path);
+    }
+
+    @Override
+    public int getTargetContainerCount(String containerType) throws FileNotFoundException, IOException, IllegalArgumentException {
+        Properties properties = new Properties();
+        properties.load(new FileReader(file));
+        if (!properties.contains(containerType))
+            throw new IllegalArgumentException(String.format("container type '%s' not found in '%s'", containerType, file.getCanonicalPath()));
+        return Integer.parseInt((String) properties.get(containerType));
+    }
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        this.file = new File(properties.getProperty("path"));
+    }
+
+    @Override
+    public void start() throws Exception {
+        // left blank
+    }
+
+    @Override
+    public void stop() throws Exception {
+        // left blank
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/RedisTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/RedisTargetProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/RedisTargetProvider.java
new file mode 100644
index 0000000..1fdf96e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/RedisTargetProvider.java
@@ -0,0 +1,356 @@
+package org.apache.helix.metamanager.impl;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.metamanager.TargetProviderService;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Redis-specific target model based on recurring Tps benchmarking. Tps target
+ * and probed redis-server instances are configured via zookeeper. Tps target
+ * may change dynamically.
+ * 
+ */
+public class RedisTargetProvider implements TargetProviderService {
+
+    static final Logger        log               = Logger.getLogger(RedisTargetProvider.class);
+
+    public static final String BENCHMARK_COMMAND = "redis-benchmark";
+    public static final String BENCHMARK_TESTS   = "GET,SET";
+
+    public static final String DEFAULT_RECORDS   = "100000";
+    public static final String DEFAULT_CLIENTS   = "20";
+    public static final String DEFAULT_REQUESTS  = "100000";
+    public static final String DEFAULT_TIMEOUT   = "8000";
+    public static final String DEFAULT_INTERVAL  = "10000";
+    public static final String DEFAULT_ALPHA     = "0.25";
+
+    ZkClient                   zookeeper;
+
+    String                     address;
+    String                     root;
+
+    int                        records;
+    int                        clients;
+    int                        requests;
+    int                        timeout;
+    int                        interval;
+
+    int                        targetTpsGet;
+    int                        targetTpsSet;
+
+    int                        targetCountMin;
+    int                        targetCountMax;
+    int                        targetCount;
+
+    double                     alpha;
+    double                     averageTpsGet;
+    double                     averageTpsSet;
+    double                     averageCount;
+
+    ScheduledExecutorService   executor;
+
+    @Override
+    public void configure(Properties properties) {
+        address = properties.getProperty("address");
+        root = properties.getProperty("root");
+        targetTpsGet = Integer.valueOf(properties.getProperty("get", "0"));
+        targetTpsSet = Integer.valueOf(properties.getProperty("set", "0"));
+        targetCountMin = Integer.valueOf(properties.getProperty("min", "-1"));
+        targetCountMax = Integer.valueOf(properties.getProperty("max", "-1"));
+        records = Integer.valueOf(properties.getProperty("records", DEFAULT_RECORDS));
+        clients = Integer.valueOf(properties.getProperty("clients", DEFAULT_CLIENTS));
+        requests = Integer.valueOf(properties.getProperty("requests", DEFAULT_REQUESTS));
+        timeout = Integer.valueOf(properties.getProperty("timeout", DEFAULT_TIMEOUT));
+        interval = Integer.valueOf(properties.getProperty("interval", DEFAULT_INTERVAL));
+        alpha = Double.valueOf(properties.getProperty("alpha", DEFAULT_ALPHA));
+    }
+
+    @Override
+    public void start() {
+        log.debug("starting redis status service");
+        zookeeper = new ZkClient(address);
+        zookeeper.createPersistent("/" + root, true);
+
+        try { zookeeper.createPersistent("/" + root + "/target.get", String.valueOf(targetTpsGet)); } catch (Exception ignore) {}
+        try { zookeeper.createPersistent("/" + root + "/target.set", String.valueOf(targetTpsSet)); } catch (Exception ignore) {}
+        try { zookeeper.createPersistent("/" + root + "/target.min", String.valueOf(targetCountMin)); } catch (Exception ignore) {}
+        try { zookeeper.createPersistent("/" + root + "/target.max", String.valueOf(targetCountMax)); } catch (Exception ignore) {}
+ 
+        executor = Executors.newSingleThreadScheduledExecutor();
+        executor.scheduleAtFixedRate(new RedisBenchmarkRunnable(), 0, interval, TimeUnit.MILLISECONDS);
+    }
+
+    @Override
+    public void stop() {
+        log.debug("stopping redis status service");
+        if (executor != null) {
+            executor.shutdownNow();
+            while (!executor.isTerminated()) {
+                try {
+                    Thread.sleep(100);
+                } catch (InterruptedException e) {
+                    // ignore
+                }
+            }
+            executor = null;
+        }
+        if (zookeeper != null) {
+            zookeeper.close();
+            zookeeper = null;
+        }
+    }
+
+    @Override
+    public int getTargetContainerCount(String containerType) throws Exception {
+        return targetCount;
+    }
+
+    private class RedisBenchmarkRunnable implements Runnable {
+        ExecutorService executor = Executors.newCachedThreadPool();
+        RedisResult     aggregateResult;
+
+        @Override
+        public void run() {
+            log.debug("running redis benchmark");
+
+            aggregateResult = new RedisResult(0);
+            Collection<Future<RedisResult>> futures = new ArrayList<Future<RedisResult>>();
+
+            try {
+                Collection<RedisTarget> targets = getTargets();
+
+                // start benchmark
+                for (RedisTarget target : targets) {
+                    log.debug(String.format("submitting target '%s'", target));
+                    Future<RedisResult> future = executor.submit(new RedisCallable(target));
+                    futures.add(future);
+                }
+
+                // aggregate results
+                try {
+                    log.debug("waiting for results");
+
+                    long limit = System.currentTimeMillis() + timeout;
+                    for (Future<RedisResult> future : futures) {
+                        try {
+                            RedisResult result = future.get(limit - System.currentTimeMillis(), TimeUnit.MILLISECONDS);
+                            log.debug(String.format("got result '%s'", result));
+                            aggregate(result);
+                        } catch (Exception e) {
+                            log.warn(String.format("failed to get result"));
+                            future.cancel(true);
+                        }
+                    }
+                } catch (Exception e) {
+                    log.error("Error running redis benchmark", e);
+
+                    for (Future<RedisResult> future : futures) {
+                        future.cancel(true);
+                    }
+
+                    return;
+                }
+
+                // compare to thresholds
+                log.debug(String.format("aggregate result is '%s'", aggregateResult));
+
+                // get target from zookeeper
+                try { targetTpsGet = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.get")); } catch (Exception ignore) {}
+                try { targetTpsSet = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.set")); } catch (Exception ignore) {}
+                try { targetCountMin = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.min")); } catch (Exception ignore) {}
+                try { targetCountMax = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.max")); } catch (Exception ignore) {}
+
+                averageCount = alpha * aggregateResult.serverCount + (1.0 - alpha) * averageCount;
+
+                // calculate counts
+                int targetCountGet = -1;
+                if (aggregateResult.containsKey("GET")) {
+                    double tpsTarget = targetTpsGet;
+                    double tps = aggregateResult.get("GET");
+
+                    averageTpsGet = alpha * tps + (1.0 - alpha) * averageTpsGet;
+
+                    targetCountGet = (int) Math.ceil(tpsTarget / averageTpsGet * averageCount);
+                    log.debug(String.format("count.get=%d, target.get=%f, tps.get=%f, tps.avg.get=%f, count.avg=%f", targetCountGet, tpsTarget, tps,
+                            averageTpsGet, averageCount));
+                }
+
+                int targetCountSet = -1;
+                if (aggregateResult.containsKey("SET")) {
+                    double tpsTarget = targetTpsSet;
+                    double tps = aggregateResult.get("SET");
+
+                    averageTpsSet = alpha * tps + (1.0 - alpha) * averageTpsSet;
+
+                    targetCountSet = (int) Math.ceil(tpsTarget / averageTpsSet * averageCount);
+                    log.debug(String.format("count.set=%d, target.set=%f, tps.set=%f, tps.avg.set=%f, count.avg=%f", targetCountSet, tpsTarget, tps,
+                            averageTpsSet, averageCount));
+                }
+
+                targetCount = Math.max(targetCountGet, targetCountSet);
+
+                if (targetCountMin > 0)
+                    targetCount = Math.max(targetCount, targetCountMin);
+                if (targetCountMax > 0)
+                    targetCount = Math.min(targetCount, targetCountMax);
+
+                targetCount = Math.max(targetCount, 1);
+
+                log.debug(String.format("target count is %d", targetCount));
+                RedisTargetProvider.this.targetCount = targetCount;
+
+            } catch (Exception e) {
+                log.error("Error running redis benchmark", e);
+
+                for (Future<RedisResult> future : futures) {
+                    future.cancel(true);
+                }
+            }
+
+        }
+
+        Collection<RedisTarget> getTargets() {
+            log.debug("fetching redis servers from zookeeper");
+            Collection<RedisTarget> targets = new ArrayList<RedisTarget>();
+            Collection<String> servers = zookeeper.getChildren("/" + root);
+
+            servers.remove("target.get");
+            servers.remove("target.set");
+            servers.remove("target.min");
+            servers.remove("target.max");
+
+            for (String server : servers) {
+                if (!zookeeper.exists("/" + root + "/" + server + "/heartbeat"))
+                    continue;
+
+                String hostname = zookeeper.readData("/" + root + "/" + server + "/hostname");
+                int port = Integer.valueOf(zookeeper.<String> readData("/" + root + "/" + server + "/port"));
+
+                targets.add(new RedisTarget(hostname, port));
+            }
+
+            log.debug(String.format("found %d servers: %s", targets.size(), targets));
+            return targets;
+        }
+
+        void aggregate(RedisResult result) {
+            RedisResult newResult = new RedisResult(aggregateResult.serverCount + result.serverCount);
+
+            for (Entry<String, Double> entry : result.entrySet()) {
+                double current = 0.0d;
+                if (aggregateResult.containsKey(entry.getKey()))
+                    current = aggregateResult.get(entry.getKey());
+
+                current += entry.getValue();
+                newResult.put(entry.getKey(), current);
+            }
+
+            aggregateResult = newResult;
+        }
+    }
+
+    private static class RedisTarget {
+        final String hostname;
+        final int    port;
+
+        public RedisTarget(String hostname, int port) {
+            this.hostname = hostname;
+            this.port = port;
+        }
+
+        @Override
+        public String toString() {
+            return String.format("%s:%d", hostname, port);
+        }
+    }
+
+    private static class RedisResult extends HashMap<String, Double> {
+        /**
+         * 
+         */
+        private static final long serialVersionUID = 4599748807597500952L;
+
+        final int                 serverCount;
+
+        public RedisResult(int serverCount) {
+            this.serverCount = serverCount;
+        }
+
+        @Override
+        public String toString() {
+            return String.format("[serverCount=%d %s]", serverCount, super.toString());
+        }
+    }
+
+    private class RedisCallable implements Callable<RedisResult> {
+        final RedisTarget target;
+
+        public RedisCallable(RedisTarget target) {
+            this.target = target;
+        }
+
+        @Override
+        public RedisResult call() throws Exception {
+            log.debug(String.format("executing benchmark for '%s'", target));
+
+            ProcessBuilder builder = new ProcessBuilder();
+            builder.command(BENCHMARK_COMMAND, "-h", target.hostname, "-p", String.valueOf(target.port), "-r", String.valueOf(records), "-n",
+                    String.valueOf(requests), "-c", String.valueOf(clients), "-t", BENCHMARK_TESTS, "--csv");
+            Process process = builder.start();
+
+            log.debug(String.format("running '%s'", builder.command()));
+
+            RedisResult result = new RedisResult(1);
+
+            int retVal;
+            try {
+                retVal = process.waitFor();
+            } catch (InterruptedException e) {
+                process.destroy();
+                return result;
+            }
+
+            Preconditions.checkState(retVal == 0, "Benchmark process returned %s", retVal);
+
+            Pattern pattern = Pattern.compile("\"([A-Z0-9_]+).*\",\"([0-9\\.]+)\"");
+
+            log.debug("parsing output");
+            BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));
+            String line = null;
+            while ((line = reader.readLine()) != null) {
+                Matcher matcher = pattern.matcher(line);
+
+                if (!matcher.find())
+                    continue;
+
+                String key = matcher.group(1);
+                Double value = Double.valueOf(matcher.group(2));
+
+                result.put(key, value);
+            }
+
+            log.debug(String.format("benchmark for '%s' returned '%s'", target, result));
+
+            return result;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/StaticTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/StaticTargetProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/StaticTargetProvider.java
new file mode 100644
index 0000000..3159fbe
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/StaticTargetProvider.java
@@ -0,0 +1,62 @@
+package org.apache.helix.metamanager.impl;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.TargetProviderService;
+import org.apache.log4j.Logger;
+
+/**
+ * Target model based on manually set count. Count may change dynamically.
+ * 
+ */
+public class StaticTargetProvider implements TargetProviderService {
+    static final Logger        log          = Logger.getLogger(StaticTargetProvider.class);
+
+    final Map<String, Integer> targetCounts = new HashMap<String, Integer>();
+
+    public StaticTargetProvider() {
+        // left blank
+    }
+
+    public StaticTargetProvider(Map<String, Integer> targetCounts) {
+        this.targetCounts.putAll(targetCounts);
+    }
+
+    @Override
+    public int getTargetContainerCount(String containerType) {
+        return targetCounts.get(containerType);
+    }
+
+    public void setTargetContainerCount(String containerType, int targetCount) {
+        targetCounts.put(containerType, targetCount);
+    }
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        for (Entry<Object, Object> entry : properties.entrySet()) {
+            String key = (String) entry.getKey();
+
+            try {
+                int value = Integer.valueOf((String) entry.getValue());
+                log.debug(String.format("Inserting value '%s = %d'", key, value));
+                targetCounts.put(key, value);
+            } catch (NumberFormatException e) {
+                log.warn(String.format("Skipping '%s', not an integer (value='%s')", key, (String) entry.getValue()));
+            }
+        }
+    }
+
+    @Override
+    public void start() throws Exception {
+        // left blank
+    }
+
+    @Override
+    public void stop() throws Exception {
+        // left blank
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/DummyMasterSlaveProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/DummyMasterSlaveProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/DummyMasterSlaveProcess.java
new file mode 100644
index 0000000..2d91bdd
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/DummyMasterSlaveProcess.java
@@ -0,0 +1,76 @@
+package org.apache.helix.metamanager.impl.container;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Sample implementation of container with Helix participant for MasterSlave
+ * state model. Print state transitions only.
+ * 
+ */
+public class DummyMasterSlaveProcess extends ContainerProcess {
+
+    static final Logger log = Logger.getLogger(DummyMasterSlaveProcess.class);
+
+    public DummyMasterSlaveProcess(ContainerProcessProperties properties) throws Exception {
+        configure(properties);
+        setModelName("MasterSlave");
+        setModelFactory(new DummyMasterSlaveModelFactory());
+    }
+
+    @Override
+    protected void startContainer() throws Exception {
+        log.info("starting dummy process container");
+    }
+
+    @Override
+    protected void stopContainer() throws Exception {
+        log.info("stopping dummy process container");
+    }
+
+    public static class DummyMasterSlaveModelFactory extends StateModelFactory<DummyMasterSlaveStateModel> {
+        @Override
+        public DummyMasterSlaveStateModel createNewStateModel(String partitionName) {
+            return new DummyMasterSlaveStateModel();
+        }
+    }
+
+    @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+    public static class DummyMasterSlaveStateModel extends StateModel {
+
+        static final Logger log = Logger.getLogger(DummyMasterSlaveStateModel.class);
+
+        @Transition(from = "OFFLINE", to = "SLAVE")
+        public void offlineToSlave(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from OFFLINE to SLAVE", context.getManager().getInstanceName()));
+        }
+
+        @Transition(from = "SLAVE", to = "OFFLINE")
+        public void slaveToOffline(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from SLAVE to OFFLINE", context.getManager().getInstanceName()));
+        }
+
+        @Transition(from = "SLAVE", to = "MASTER")
+        public void slaveToMaster(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from SLAVE to MASTER", context.getManager().getInstanceName()));
+        }
+
+        @Transition(from = "MASTER", to = "SLAVE")
+        public void masterToSlave(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from MASTER to SLAVE", context.getManager().getInstanceName()));
+        }
+
+        @Transition(from = "OFFLINE", to = "DROPPED")
+        public void offlineToDropped(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from OFFLINE to DROPPED", context.getManager().getInstanceName()));
+        }
+
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/DummyOnlineOfflineProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/DummyOnlineOfflineProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/DummyOnlineOfflineProcess.java
new file mode 100644
index 0000000..62f63a8
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/DummyOnlineOfflineProcess.java
@@ -0,0 +1,66 @@
+package org.apache.helix.metamanager.impl.container;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Sample implementation of container with Helix participant for OnlineOffline
+ * state model. Print state transitions only.
+ * 
+ */
+public class DummyOnlineOfflineProcess extends ContainerProcess {
+
+    static final Logger log = Logger.getLogger(DummyOnlineOfflineProcess.class);
+
+    public DummyOnlineOfflineProcess(ContainerProcessProperties properties) throws Exception {
+        configure(properties);
+        setModelName("OnlineOffline");
+        setModelFactory(new DummyOnlineOfflineModelFactory());
+    }
+
+    @Override
+    protected void startContainer() throws Exception {
+        log.info("starting dummy online-offline process container");
+    }
+
+    @Override
+    protected void stopContainer() throws Exception {
+        log.info("stopping dummy online-offline process container");
+    }
+
+    public static class DummyOnlineOfflineModelFactory extends StateModelFactory<DummyOnlineOfflineStateModel> {
+        @Override
+        public DummyOnlineOfflineStateModel createNewStateModel(String partitionName) {
+            return new DummyOnlineOfflineStateModel();
+        }
+    }
+
+    @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE", "DROPPED" })
+    public static class DummyOnlineOfflineStateModel extends StateModel {
+
+        static final Logger log = Logger.getLogger(DummyOnlineOfflineStateModel.class);
+
+        @Transition(from = "OFFLINE", to = "ONLINE")
+        public void offlineToOnline(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from OFFLINE to ONLINE", context.getManager().getInstanceName()));
+        }
+
+        @Transition(from = "ONLINE", to = "OFFLINE")
+        public void onlineToOffline(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from ONLINE to OFFLINE", context.getManager().getInstanceName()));
+        }
+
+        @Transition(from = "OFFLINE", to = "DROPPED")
+        public void offlineToDropped(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from OFFLINE to DROPPED", context.getManager().getInstanceName()));
+        }
+
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/RedisServerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/RedisServerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/RedisServerProcess.java
new file mode 100644
index 0000000..c87f905
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/RedisServerProcess.java
@@ -0,0 +1,140 @@
+package org.apache.helix.metamanager.impl.container;
+
+import java.net.InetAddress;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Container implementation for redis-server. Uses OnlineOffline model, spawns
+ * Redis as Shell process and writes metadata to zookeeper.
+ * 
+ */
+public class RedisServerProcess extends ContainerProcess {
+
+    static final Logger        log                  = Logger.getLogger(RedisServerProcess.class);
+
+    public static final String REDIS_SERVER_COMMAND = "redis-server";
+
+    public static final long   MONITOR_INTERVAL     = 5000;
+
+    ZkClient                   zookeeper;
+
+    final String               address;
+    final String               root;
+    final String               name;
+    final int                  basePort;
+
+    Process                    process;
+
+    ScheduledExecutorService   executor;
+
+    public RedisServerProcess(ContainerProcessProperties properties) throws Exception {
+        configure(properties);
+        setModelName("OnlineOffline");
+        setModelFactory(new RedisServerModelFactory());
+
+        address = properties.getProperty("address");
+        root = properties.getProperty("root");
+        basePort = Integer.valueOf(properties.getProperty("baseport"));
+        name = properties.getProperty(ContainerProcessProperties.NAME);
+    }
+
+    @Override
+    protected void startContainer() throws Exception {
+        log.info(String.format("starting redis server container for instance '%s'", name));
+
+        String hostname = InetAddress.getLocalHost().getHostName();
+        int port = basePort + Integer.valueOf(name.split("_")[1]);
+
+        log.debug(String.format("Starting redis server at '%s:%d'", hostname, port));
+
+        ProcessBuilder builder = new ProcessBuilder();
+        builder.command(REDIS_SERVER_COMMAND, "--port", String.valueOf(port));
+        process = builder.start();
+
+        log.debug("Updating zookeeper");
+        zookeeper = new ZkClient(address);
+        zookeeper.deleteRecursive("/" + root + "/" + name);
+        zookeeper.createPersistent("/" + root + "/" + name, true);
+        zookeeper.createPersistent("/" + root + "/" + name + "/hostname", hostname);
+        zookeeper.createPersistent("/" + root + "/" + name + "/port", String.valueOf(port));
+        zookeeper.createEphemeral("/" + root + "/" + name + "/heartbeat");
+
+        log.debug("Starting process monitor");
+        executor = Executors.newSingleThreadScheduledExecutor();
+        executor.scheduleAtFixedRate(new ProcessMonitor(), 0, MONITOR_INTERVAL, TimeUnit.MILLISECONDS);
+
+    }
+
+    @Override
+    protected void stopContainer() throws Exception {
+        log.info("stopping redis server container");
+
+        log.debug("Stopping process monitor");
+        executor.shutdownNow();
+
+        log.debug("Updating zookeeper");
+        zookeeper.deleteRecursive("/" + root + "/" + name);
+        zookeeper.close();
+
+        log.debug("Stopping process");
+        process.destroy();
+        process.waitFor();
+    }
+
+    public class RedisServerModelFactory extends StateModelFactory<RedisServerModel> {
+        @Override
+        public RedisServerModel createNewStateModel(String partitionName) {
+            return new RedisServerModel();
+        }
+    }
+
+    @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE", "DROPPED" })
+    public class RedisServerModel extends StateModel {
+
+        @Transition(from = "OFFLINE", to = "ONLINE")
+        public void offlineToSlave(Message m, NotificationContext context) {
+            // left blank
+            log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+        }
+
+        @Transition(from = "ONLINE", to = "OFFLINE")
+        public void slaveToOffline(Message m, NotificationContext context) {
+            // left blank
+            log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+        }
+
+        @Transition(from = "OFFLINE", to = "DROPPED")
+        public void offlineToDropped(Message m, NotificationContext context) {
+            // left blank
+            log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+        }
+
+    }
+
+    private class ProcessMonitor implements Runnable {
+        @Override
+        public void run() {
+            try {
+                process.exitValue();
+                log.warn("detected process failure");
+                fail();
+            } catch (Exception e) {
+                // expected
+            }
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/ZookeeperMasterSlaveProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/ZookeeperMasterSlaveProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/ZookeeperMasterSlaveProcess.java
new file mode 100644
index 0000000..a493a71
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/ZookeeperMasterSlaveProcess.java
@@ -0,0 +1,108 @@
+package org.apache.helix.metamanager.impl.container;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Sample implementation of container with Helix participant for MasterSlave
+ * state model. Writes current state to separate zookeeper domain.
+ * 
+ */
+public class ZookeeperMasterSlaveProcess extends ContainerProcess {
+
+    static final Logger log = Logger.getLogger(ZookeeperMasterSlaveProcess.class);
+
+    ZkClient            zookeeper;
+
+    final String        address;
+    final String        root;
+    final String        name;
+
+    public ZookeeperMasterSlaveProcess(ContainerProcessProperties properties) throws Exception {
+        configure(properties);
+        setModelName("MasterSlave");
+        setModelFactory(new ZookeeperMasterSlaveModelFactory());
+
+        address = properties.getProperty("address");
+        root = properties.getProperty("root");
+        name = properties.getProperty(ContainerProcessProperties.NAME);
+    }
+
+    @Override
+    protected void startContainer() throws Exception {
+        log.info("starting zookeeper process container");
+
+        zookeeper = new ZkClient(address);
+        zookeeper.createPersistent("/" + root + "/" + name, true);
+    }
+
+    @Override
+    protected void stopContainer() throws Exception {
+        log.info("stopping zookeeper process container");
+
+        zookeeper.close();
+    }
+
+    public class ZookeeperMasterSlaveModelFactory extends StateModelFactory<ZookeeperMasterSlaveModel> {
+        @Override
+        public ZookeeperMasterSlaveModel createNewStateModel(String partitionName) {
+            return new ZookeeperMasterSlaveModel();
+        }
+    }
+
+    @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+    public class ZookeeperMasterSlaveModel extends StateModel {
+
+        @Transition(from = "OFFLINE", to = "SLAVE")
+        public void offlineToSlave(Message m, NotificationContext context) {
+            transition(m, context);
+        }
+
+        @Transition(from = "SLAVE", to = "OFFLINE")
+        public void slaveToOffline(Message m, NotificationContext context) {
+            transition(m, context);
+        }
+
+        @Transition(from = "SLAVE", to = "MASTER")
+        public void slaveToMaster(Message m, NotificationContext context) {
+            transition(m, context);
+        }
+
+        @Transition(from = "MASTER", to = "SLAVE")
+        public void masterToSlave(Message m, NotificationContext context) {
+            transition(m, context);
+        }
+
+        @Transition(from = "OFFLINE", to = "DROPPED")
+        public void offlineToDropped(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+
+            String resource = m.getResourceName();
+            String partition = m.getPartitionName();
+            String path = "/" + root + "/" + name + "/" + resource + "_" + partition;
+
+            zookeeper.delete(path);
+        }
+
+        public void transition(Message m, NotificationContext context) {
+            log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+
+            String resource = m.getResourceName();
+            String partition = m.getPartitionName();
+            String path = "/" + root + "/" + name + "/" + resource + "_" + partition;
+
+            zookeeper.delete(path);
+            zookeeper.createEphemeral(path, m.getToState());
+        }
+
+    }
+
+}


[08/15] Adding Helix-task-framework and Yarn integration modules

Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/BootUtils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/BootUtils.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/BootUtils.java
new file mode 100644
index 0000000..fbbfa14
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/BootUtils.java
@@ -0,0 +1,127 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.util.Collection;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+public class BootUtils {
+
+    public static final String CLASS_PROPERTY = "class";
+    static final Logger        log            = Logger.getLogger(BootUtils.class);
+
+    public static boolean hasNamespace(Properties properties, String namespace) {
+        String prefix = namespace + ".";
+        for (String key : properties.stringPropertyNames()) {
+            if (key.startsWith(prefix))
+                return true;
+        }
+        return false;
+    }
+    
+    public static Set<String> getNamespaces(Properties properties) {
+        Pattern pattern = Pattern.compile("^([^\\.\\=]+)");
+        
+        Set<String> namespaces = Sets.newHashSet();
+        
+        for (Map.Entry<Object, Object> rawEntry : properties.entrySet()) {
+            String key = (String) rawEntry.getKey();
+            
+            Matcher matcher = pattern.matcher(key);
+            if(matcher.find()) {
+                namespaces.add(matcher.group(1));
+            }
+        }
+        
+        return namespaces;
+    }
+
+    public static Properties getNamespace(Properties source, String namespace) {
+        Properties dest = new Properties();
+        String prefix = namespace + ".";
+
+        for (Map.Entry<Object, Object> rawEntry : source.entrySet()) {
+            String key = (String) rawEntry.getKey();
+            String value = (String) rawEntry.getValue();
+
+            if (key.startsWith(prefix)) {
+                String newKey = key.substring(prefix.length());
+                dest.put(newKey, value);
+            }
+        }
+
+        return dest;
+    }
+    
+    @SuppressWarnings("unchecked")
+    public static <T> T createInstance(Properties properties) throws Exception {
+        String className = properties.getProperty(CLASS_PROPERTY);
+
+        Class<?> containerClass = Class.forName(className);
+
+        try {
+            log.debug(String.format("checking for properties constructor in class '%s'", className));
+            return (T) containerClass.getConstructor(ContainerProcessProperties.class).newInstance(properties);
+        } catch (Exception e) {
+            log.debug("no properties constructor found");
+        }
+
+        try {
+            log.debug(String.format("checking for default constructor in class '%s'", className));
+            return (T) containerClass.getConstructor().newInstance();
+        } catch (Exception e) {
+            log.debug("no default constructor found");
+        }
+
+        throw new Exception(String.format("no suitable constructor for class '%s'", className));
+    }
+
+    public static <T> T createInstanceFromNamespace(Properties properties, String namespace) throws Exception {
+        return createInstance(getNamespace(properties, namespace));
+    }
+
+    @SuppressWarnings("unchecked")
+    public static <T> T createInstance(Class<?> clazz) throws Exception {
+        try {
+            log.debug(String.format("checking for default constructor in class '%s'", clazz.getSimpleName()));
+            return (T) clazz.getConstructor().newInstance();
+        } catch (Exception e) {
+            log.debug("no default constructor found");
+        }
+
+        throw new Exception(String.format("no suitable constructor for class '%s'", clazz.getSimpleName()));
+    }
+    
+    public static <T> T createInstance(String className) throws Exception {
+        return createInstance(Class.forName(className));
+    }
+    
+    public static Collection<Properties> getContainerProps(Properties properties) {
+        Collection<Properties> containerProps = Lists.newArrayList();
+        
+        String containers = properties.getProperty("containers");
+        String containerTypes[] = StringUtils.split(containers, ",");
+
+        for (String containerType : containerTypes) {
+            Properties containerProp = BootUtils.getNamespace(BootUtils.getNamespace(properties, "container"), containerType);
+            log.debug(String.format("adding container type (type='%s', properties='%s')", containerType, containerProp));
+            containerProps.add(containerProp);
+        }
+
+        return containerProps;
+    }
+
+    private BootUtils() {
+        // left blank
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/Bootstrapper.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/Bootstrapper.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/Bootstrapper.java
new file mode 100644
index 0000000..94de15f
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/Bootstrapper.java
@@ -0,0 +1,93 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.util.Properties;
+
+import org.apache.log4j.Logger;
+
+public class Bootstrapper {
+
+    static final Logger log = Logger.getLogger(Bootstrapper.class);
+
+    ManagedCluster      managed;
+    MetaCluster         meta;
+    ZookeeperWrapper    zookeeper;
+    Properties          properties;
+
+    public Bootstrapper(Properties properties) {
+        this.properties = properties;
+    }
+
+    public void start() throws Exception {
+        log.info("bootstrapping cluster");
+        if (BootUtils.hasNamespace(properties, "zookeeper")) {
+            log.info("starting zookeeper");
+            zookeeper = new ZookeeperWrapper(BootUtils.getNamespace(properties, "zookeeper"));
+            zookeeper.startService();
+        }
+
+        log.info("starting managed cluster");
+        managed = new ManagedCluster();
+        managed.setProperties(BootUtils.getNamespace(properties, "managed"));
+        managed.start();
+
+        log.info("starting meta cluster");
+        meta = new MetaCluster();
+        meta.setProperties(BootUtils.getNamespace(properties, "meta"));
+        meta.start();
+    }
+
+    public void stop() throws Exception {
+        log.info("tearing down cluster");
+        if (meta != null) {
+            log.info("stopping meta cluster");
+            meta.stop();
+            meta = null;
+        }
+        if (managed != null) {
+            log.info("stopping managed cluster");
+            managed.stop();
+            managed = null;
+        }
+        if (zookeeper != null) {
+            log.info("stopping zookeeper");
+            zookeeper.stopService();
+            zookeeper = null;
+        }
+
+    }
+
+    public ManagedCluster getManaged() {
+        return managed;
+    }
+
+    public MetaCluster getMeta() {
+        return meta;
+    }
+
+    public ZookeeperWrapper getZookeeper() {
+        return zookeeper;
+    }
+
+    public Properties getProperties() {
+        return properties;
+    }
+
+    public static void main(String[] args) throws Exception {
+        String resourcePath = args[0];
+
+        log.info(String.format("reading cluster definition from '%s'", resourcePath));
+        Properties properties = new Properties();
+        properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+
+        final Bootstrapper boot = new Bootstrapper(properties);
+        boot.start();
+
+        Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+            @Override
+            public void run() {
+                try { boot.stop(); } catch(Exception ignore) {}
+            }
+        }));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ManagedCluster.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ManagedCluster.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ManagedCluster.java
new file mode 100644
index 0000000..b792c9f
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ManagedCluster.java
@@ -0,0 +1,87 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.util.Properties;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+public class ManagedCluster {
+
+    static final Logger log = Logger.getLogger(ManagedCluster.class);
+
+    public static final String DEFAULT_CLUSTER = "managed";
+    
+    Properties          properties;
+
+    HelixAdmin          admin;
+    HelixManager        controllerMananger;
+
+    public void start() {
+        String cluster = properties.getProperty("cluster", DEFAULT_CLUSTER);
+        String address = properties.getProperty("address");
+
+        log.info(String.format("starting managed cluster service (cluster='%s', address='%s')", cluster, address));
+
+        log.debug("setting up cluster admin");
+        admin = new ZKHelixAdmin(address);
+        admin.addCluster(cluster, false);
+        admin.addStateModelDef(cluster, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+        admin.addStateModelDef(cluster, "MasterSlave", new StateModelDefinition(StateModelConfigGenerator.generateConfigForMasterSlave()));
+
+        log.debug("setting up resources");
+        String resources = properties.getProperty("resources");
+        String[] resourceNames = StringUtils.split(resources, ",");
+
+        for (String resourceName : resourceNames) {
+            Properties properties = BootUtils.getNamespace(BootUtils.getNamespace(this.properties, "resource"), resourceName);
+
+            log.debug(String.format("parsing resource '%s' (properties='%s')", resourceName, properties));
+
+            String container = properties.getProperty("container");
+            String model = properties.getProperty("model");
+            int partitions = Integer.parseInt(properties.getProperty("partitions"));
+            int replica = Integer.parseInt(properties.getProperty("replica"));
+
+            log.debug(String.format("setting up resource '%s' (container='%s', model='%s', partitions=%d, replica=%d)", resourceName, container, model,
+                    partitions, replica));
+
+            admin.addResource(cluster, resourceName, partitions, model, RebalanceMode.FULL_AUTO.toString());
+            IdealState idealState = admin.getResourceIdealState(cluster, resourceName);
+            idealState.setInstanceGroupTag(container);
+            idealState.setReplicas(String.valueOf(replica));
+            admin.setResourceIdealState(cluster, resourceName, idealState);
+        }
+
+        log.debug("setting up controller");
+        controllerMananger = HelixControllerMain.startHelixController(address, cluster, "managedController", HelixControllerMain.STANDALONE);
+    }
+
+    public void stop() {
+        log.info("stopping managed cluster service");
+        if (controllerMananger != null) {
+            controllerMananger.disconnect();
+            controllerMananger = null;
+        }
+        if (admin != null) {
+            admin.close();
+            admin = null;
+        }
+    }
+
+    public Properties getProperties() {
+        return properties;
+    }
+
+    public void setProperties(Properties properties) {
+        this.properties = properties;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/MetaCluster.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/MetaCluster.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/MetaCluster.java
new file mode 100644
index 0000000..51700a8
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/MetaCluster.java
@@ -0,0 +1,201 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.ConfigTool;
+import org.apache.helix.metamanager.provider.ProviderProcess;
+import org.apache.helix.metamanager.provider.ProviderRebalancer;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+public class MetaCluster {
+
+    static final Logger         log              = Logger.getLogger(MetaCluster.class);
+
+    private static final String DEFAULT_CLUSTER = "meta";
+    private static final String DEFAULT_MANAGED = "managed";
+    private static final String DEFAULT_INTERVAL = "10000";
+
+    Properties                  properties;
+
+    TargetWrapper               target;
+    StatusWrapper               status;
+    ProviderWrapper             provider;
+
+    HelixAdmin                  admin;
+    HelixManager                controllerManager;
+    ProviderProcess             providerProcess;
+
+    String                      cluster;
+    String                      address;
+    String                      managed;
+    int                         interval;
+
+    ScheduledExecutorService    executor;
+
+    public Properties getProperties() {
+        return properties;
+    }
+
+    public void setProperties(Properties properties) {
+        this.properties = properties;
+    }
+
+    public void start() throws Exception {
+        Preconditions.checkArgument(BootUtils.hasNamespace(properties, "target"), "No 'target' property specified");
+        Preconditions.checkArgument(BootUtils.hasNamespace(properties, "status"), "No 'status' property specified");
+        Preconditions.checkArgument(BootUtils.hasNamespace(properties, "provider"), "No 'provider' property specified");
+
+        cluster = properties.getProperty("cluster", DEFAULT_CLUSTER);
+        address = properties.getProperty("address");
+        managed = properties.getProperty("managed", DEFAULT_MANAGED);
+        interval = Integer.valueOf(properties.getProperty("interval", DEFAULT_INTERVAL));
+
+        log.info(String.format("starting meta cluster service (cluster='%s', address='%s', managed='%s')", cluster, address, managed));
+
+        log.debug("setting up cluster admin");
+        admin = new ZKHelixAdmin(address);
+        admin.addCluster(cluster, false);
+        admin.addStateModelDef(cluster, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+
+        log.debug("setting up target service");
+        target = new TargetWrapper(BootUtils.getNamespace(properties, "target"));
+        target.startService();
+
+        log.debug("setting up container status service");
+        status = new StatusWrapper(BootUtils.getNamespace(properties, "status"));
+        status.startService();
+
+        log.debug("setting up container provider service");
+        provider = new ProviderWrapper(BootUtils.getNamespace(properties, "provider"));
+        admin.addInstance(cluster, new InstanceConfig(provider.getProviderName()));
+        
+        provider.startService();
+
+        log.debug("setting up config tool");
+        ConfigTool.setTargetProvider(target.getTarget());
+        ConfigTool.setStatusProvider(status.getStatus());
+
+        log.debug("setting up provider");
+        String providerName = provider.getProviderName();
+
+        admin.addInstance(cluster, new InstanceConfig(providerName));
+
+        for (String containerType : provider.getContainerTypes()) {
+            log.debug(String.format("setting up container type '%s'", containerType));
+
+            admin.addResource(cluster, containerType, target.getTarget().getTargetContainerCount(containerType), "OnlineOffline",
+                    RebalanceMode.USER_DEFINED.toString());
+
+            IdealState idealState = admin.getResourceIdealState(cluster, containerType);
+            idealState.setRebalancerClassName(ProviderRebalancer.class.getName());
+            idealState.setReplicas("1");
+            
+            // BEGIN workaround
+            // FIXME workaround for HELIX-226
+            Map<String, List<String>> listFields = Maps.newHashMap();
+            Map<String, Map<String, String>> mapFields = Maps.newHashMap();
+            for(int i=0; i<256; i++) {
+                String partitionName = containerType + "_" + i;
+                listFields.put(partitionName, new ArrayList<String>());
+                mapFields.put(partitionName, new HashMap<String, String>());
+            }
+            idealState.getRecord().setListFields(listFields);
+            idealState.getRecord().setMapFields(mapFields);
+            // END workaround
+            
+            admin.setResourceIdealState(cluster, containerType, idealState);
+        }
+
+        log.debug("starting controller");
+        controllerManager = HelixControllerMain.startHelixController(address, cluster, "metaController", HelixControllerMain.STANDALONE);
+
+        log.debug("starting state refresh service");
+        executor = Executors.newSingleThreadScheduledExecutor();
+        executor.scheduleAtFixedRate(new MetaRefreshRunnable(), interval, interval, TimeUnit.MILLISECONDS);
+        
+        HelixConfigScope scope = new HelixConfigScopeBuilder(ConfigScopeProperty.CLUSTER, cluster).build();
+        admin.setConfig(scope, Collections.singletonMap("key", "value"));
+        
+    }
+
+    public void stop() throws Exception {
+        log.info("stopping meta cluster service");
+        if (executor != null) {
+            executor.shutdownNow();
+            executor = null;
+        }
+        if (controllerManager != null) {
+            controllerManager.disconnect();
+            controllerManager = null;
+        }
+        if (providerProcess != null) {
+            providerProcess.stop();
+            providerProcess = null;
+        }
+        if (provider != null) {
+            provider.stopService();
+            provider = null;
+        }
+        if (status != null) {
+            status.stopService();
+            status = null;
+        }
+        if (target != null) {
+            target.stopService();
+            target = null;
+        }
+        if (admin != null) {
+            admin.close();
+            admin = null;
+        }
+    }
+
+    public TargetWrapper getTarget() {
+        return target;
+    }
+
+    public StatusWrapper getStatus() {
+        return status;
+    }
+
+    public ProviderWrapper getProvider() {
+        return provider;
+    }
+
+    private class MetaRefreshRunnable implements Runnable {
+        @Override
+        public void run() {
+            log.debug("running status refresh");
+            for (String containerType : provider.getContainerTypes()) {
+                log.debug(String.format("refreshing container type '%s'", containerType));
+
+                IdealState poke = admin.getResourceIdealState(cluster, containerType);
+                admin.setResourceIdealState(cluster, containerType, poke);
+            }
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ProviderWrapper.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ProviderWrapper.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ProviderWrapper.java
new file mode 100644
index 0000000..b8e35bb
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ProviderWrapper.java
@@ -0,0 +1,162 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.metamanager.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProperties;
+import org.apache.log4j.Logger;
+
+public class ProviderWrapper {
+
+    static final Logger      log = Logger.getLogger(ProviderWrapper.class);
+
+    WrapperImpl              impl;
+    Properties               properties;
+
+    public ProviderWrapper(Properties properties) {
+        this.properties = properties;
+    }
+
+    public void startService() throws Exception {
+        String type = (String) properties.get("type");
+
+        log.info(String.format("starting container provider service (type='%s')", type));
+
+        if ("local".equals(type)) {
+            impl = new LocalWrapperImpl();
+
+        } else if ("shell".equals(type)) {
+            impl = new ShellWrapperImpl();
+
+        } else if ("yarn".equals(type)) {
+            impl = new YarnWrapperImpl();
+
+        } else {
+            throw new IllegalArgumentException(String.format("type '%s' not supported", type));
+        }
+
+        impl.startService();
+    }
+
+    public void stopService() throws Exception {
+        impl.stopService();
+    }
+
+    public String getProviderName() {
+        return properties.getProperty("name");
+    }
+
+    public Set<String> getContainerTypes() {
+        String containers = properties.getProperty("containers");
+        String containerTypes[] = StringUtils.split(containers, ",");
+        return new HashSet<String>(Arrays.asList(containerTypes));
+    }
+
+    static interface WrapperImpl {
+        void startService() throws Exception;
+
+        void stopService() throws Exception;
+    }
+
+    class LocalWrapperImpl implements WrapperImpl {
+        LocalContainerProviderProcess process;
+
+        @Override
+        public void startService() throws Exception {
+            String name = properties.getProperty("name");
+            String address = properties.getProperty("address");
+            String cluster = properties.getProperty("cluster");
+            String containers = properties.getProperty("containers");
+
+            log.debug(String.format("creating local container provider (name='%s', address='%s', cluster='%s', containers='%s')", name, address, cluster,
+                    containers));
+
+            process = new LocalContainerProviderProcess();
+            process.configure(properties);
+            process.start();
+        }
+
+        @Override
+        public void stopService() throws Exception {
+            process.stop();
+            process = null;
+        }
+
+    }
+
+    class ShellWrapperImpl implements WrapperImpl {
+
+        ShellContainerProviderProcess process;
+
+        @Override
+        public void startService() throws Exception {
+            String name = properties.getProperty("name");
+            String address = properties.getProperty("address");
+            String cluster = properties.getProperty("cluster");
+            String containers = properties.getProperty("containers");
+
+            log.debug(String.format("creating shell container provider (name='%s', address='%s', cluster='%s', containers='%s')", name, address, cluster,
+                    containers));
+
+            process = new ShellContainerProviderProcess();
+            process.configure(properties);
+            process.start();
+        }
+
+        @Override
+        public void stopService() throws Exception {
+            process.stop();
+            process = null;
+        }
+
+    }
+
+    class YarnWrapperImpl implements WrapperImpl {
+
+        YarnContainerProviderProcess process;
+
+        @Override
+        public void startService() throws Exception {
+            String name = properties.getProperty("name");
+            String address = properties.getProperty("address");
+            String cluster = properties.getProperty("cluster");
+            String containers = properties.getProperty("containers");
+            String metadata = properties.getProperty("metadata");
+            String resourcemanager = properties.getProperty("resourcemananger");
+            String scheduler = properties.getProperty("scheduler");
+            String user = properties.getProperty("user");
+            String hdfs = properties.getProperty("hdfs");
+
+            YarnContainerProviderProperties yarnProperties = new YarnContainerProviderProperties();
+            yarnProperties.setProperty(YarnContainerProviderProperties.CLUSTER, cluster);
+            yarnProperties.setProperty(YarnContainerProviderProperties.ADDRESS, address);
+            yarnProperties.setProperty(YarnContainerProviderProperties.NAME, name);
+            yarnProperties.setProperty(YarnContainerProviderProperties.METADATA, metadata);
+            yarnProperties.setProperty(YarnContainerProviderProperties.RESOURCEMANAGER, resourcemanager);
+            yarnProperties.setProperty(YarnContainerProviderProperties.SCHEDULER, scheduler);
+            yarnProperties.setProperty(YarnContainerProviderProperties.USER, user);
+            yarnProperties.setProperty(YarnContainerProviderProperties.HDFS, hdfs);
+
+            log.debug(String.format("creating yarn container provider (name='%s', address='%s', cluster='%s', metadata='%s', resourcemananger='%s', " +
+            		"scheduler='%s', user='%s', hdfs='%s', containers='%s')", name, address, cluster, metadata, resourcemanager, scheduler, user, hdfs, containers));
+
+            process = new YarnContainerProviderProcess();
+            process.configure(yarnProperties);
+            process.start();
+        }
+
+        @Override
+        public void stopService() throws Exception {
+            process.stop();
+            process = null;
+        }
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/StatusWrapper.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/StatusWrapper.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/StatusWrapper.java
new file mode 100644
index 0000000..20fa0db
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/StatusWrapper.java
@@ -0,0 +1,122 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.util.Properties;
+
+import org.apache.helix.metamanager.StatusProvider;
+import org.apache.helix.metamanager.StatusProviderService;
+import org.apache.helix.metamanager.impl.local.LocalStatusProvider;
+import org.apache.helix.metamanager.impl.shell.ShellStatusProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnStatusProvider;
+import org.apache.log4j.Logger;
+
+public class StatusWrapper {
+
+    static final Logger   log = Logger.getLogger(StatusWrapper.class);
+
+    WrapperImpl           impl;
+    StatusProviderService status;
+    Properties            properties;
+
+    public StatusWrapper(Properties properties) {
+        this.properties = properties;
+    }
+
+    public void startService() throws Exception {
+        String type = (String) properties.get("type");
+
+        log.info(String.format("starting container status service (type='%s')", type));
+
+        if ("local".equals(type)) {
+            impl = new LocalWrapperImpl();
+
+        } else if ("shell".equals(type)) {
+            impl = new ShellWrapperImpl();
+
+        } else if ("yarn".equals(type)) {
+            impl = new YarnWrapperImpl();
+
+        } else {
+            throw new IllegalArgumentException(String.format("type '%s' not supported", type));
+        }
+
+        impl.startService();
+    }
+
+    public void stopService() throws Exception {
+        log.debug("stopping container status provider");
+        impl.stopService();
+        status = null;
+    }
+
+    public StatusProvider getStatus() {
+        return status;
+    }
+
+    static interface WrapperImpl {
+        void startService() throws Exception;
+
+        void stopService() throws Exception;
+    }
+
+    class LocalWrapperImpl implements WrapperImpl {
+
+        LocalStatusProvider status;
+
+        @Override
+        public void startService() throws Exception {
+            log.debug("creating local container status provider");
+            status = new LocalStatusProvider();
+            status.configure(properties);
+            status.start();
+
+            StatusWrapper.this.status = status;
+        }
+
+        @Override
+        public void stopService() throws Exception {
+            status.stop();
+        }
+    }
+
+    class ShellWrapperImpl implements WrapperImpl {
+
+        ShellStatusProvider status;
+
+        @Override
+        public void startService() throws Exception {
+            log.debug("creating shell container status provider");
+            status = new ShellStatusProvider();
+            status.configure(properties);
+            status.start();
+            StatusWrapper.this.status = status;
+        }
+
+        @Override
+        public void stopService() throws Exception {
+            status.stop();
+        }
+    }
+
+    class YarnWrapperImpl implements WrapperImpl {
+
+        YarnStatusProvider status;
+
+        @Override
+        public void startService() throws Exception {
+            String metadata = properties.getProperty("metadata");
+
+            log.debug(String.format("creating yarn container status provider (metadata='%s')", metadata));
+            status = new YarnStatusProvider();
+            status.configure(properties);
+            status.start();
+
+            StatusWrapper.this.status = status;
+        }
+
+        @Override
+        public void stopService() throws Exception {
+            status.stop();
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/TargetWrapper.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/TargetWrapper.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/TargetWrapper.java
new file mode 100644
index 0000000..5920fc4
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/TargetWrapper.java
@@ -0,0 +1,117 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.util.Properties;
+
+import org.apache.helix.metamanager.TargetProvider;
+import org.apache.helix.metamanager.TargetProviderService;
+import org.apache.helix.metamanager.impl.FileTargetProvider;
+import org.apache.helix.metamanager.impl.RedisTargetProvider;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.log4j.Logger;
+
+public class TargetWrapper {
+
+    static final Logger   log = Logger.getLogger(TargetWrapper.class);
+
+    WrapperImpl           impl;
+    Properties            properties;
+    TargetProviderService target;
+
+    public TargetWrapper(Properties properties) {
+        this.properties = properties;
+    }
+
+    public void startService() throws Exception {
+        String type = (String) properties.get("type");
+
+        log.info(String.format("starting target service (type='%s')", type));
+
+        if ("static".equals(type)) {
+            impl = new StaticWrapperImpl();
+
+        } else if ("file".equals(type)) {
+            impl = new FileWrapperImpl();
+
+        } else if ("redis".equals(type)) {
+            impl = new RedisWrapperImpl();
+
+        } else {
+            throw new IllegalArgumentException(String.format("type '%s' not supported", type));
+        }
+
+        impl.startService();
+    }
+
+    public void stopService() throws Exception {
+        log.info("stopping target service");
+        impl.stopService();
+        target = null;
+    }
+
+    public TargetProvider getTarget() {
+        return target;
+    }
+
+    static interface WrapperImpl {
+        void startService() throws Exception;
+
+        void stopService() throws Exception;
+    }
+
+    private class StaticWrapperImpl implements WrapperImpl {
+        @Override
+        public void startService() throws Exception {
+            log.debug("creating static target provider");
+            Properties prop = new Properties();
+            prop.putAll(properties);
+            prop.remove("type");
+
+            target = new StaticTargetProvider();
+            target.configure(prop);
+            target.start();
+        }
+
+        @Override
+        public void stopService() throws Exception {
+            target.stop();
+        }
+    }
+
+    private class FileWrapperImpl implements WrapperImpl {
+        @Override
+        public void startService() throws Exception {
+            log.debug("creating file target provider");
+            Properties prop = new Properties();
+            prop.putAll(properties);
+            prop.remove("type");
+
+            target = new FileTargetProvider();
+            target.configure(prop);
+            target.start();
+        }
+
+        @Override
+        public void stopService() throws Exception {
+            target.stop();
+        }
+    }
+
+    private class RedisWrapperImpl implements WrapperImpl {
+        @Override
+        public void startService() throws Exception {
+            log.debug("creating redis target provider");
+            Properties prop = new Properties();
+            prop.putAll(properties);
+            prop.remove("type");
+
+            target = new RedisTargetProvider();
+            target.configure(prop);
+            target.start();
+        }
+
+        @Override
+        public void stopService() throws Exception {
+            target.stop();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ZookeeperWrapper.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ZookeeperWrapper.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ZookeeperWrapper.java
new file mode 100644
index 0000000..eca7fab
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ZookeeperWrapper.java
@@ -0,0 +1,57 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.io.File;
+import java.util.Properties;
+
+import org.I0Itec.zkclient.IDefaultNameSpace;
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkServer;
+import org.apache.commons.io.FileUtils;
+import org.apache.log4j.Logger;
+
+public class ZookeeperWrapper {
+
+    static final Logger log = Logger.getLogger(ZookeeperWrapper.class);
+
+    ZkServer            server;
+    Properties          properties;
+
+    public ZookeeperWrapper(Properties properties) {
+        this.properties = properties;
+    }
+
+    public void startService() {
+        String dataDir = properties.getProperty("datadir");
+        String logDir = properties.getProperty("logdir");
+        int port = Integer.parseInt(properties.getProperty("port"));
+
+        log.info(String.format("starting zookeeper service (dataDir='%s', logDir='%s', port=%d)", dataDir, logDir, port));
+
+        FileUtils.deleteQuietly(new File(dataDir));
+        FileUtils.deleteQuietly(new File(logDir));
+
+        IDefaultNameSpace defaultNameSpace = new IDefaultNameSpace() {
+            @Override
+            public void createDefaultNameSpace(ZkClient zkClient) {
+                // left blank
+            }
+        };
+
+        server = new ZkServer(dataDir, logDir, defaultNameSpace, port);
+        server.start();
+    }
+
+    public void stopService() {
+        log.info("stopping zookeeper service");
+
+        if (server != null) {
+            server.shutdown();
+            server = null;
+        }
+    }
+
+    public ZkServer getZookeeper() {
+        return server;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/Boot.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/Boot.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/Boot.java
new file mode 100644
index 0000000..004573d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/Boot.java
@@ -0,0 +1,132 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.Service;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+/**
+ * Bootstrapper for elastic cluster deployment using *.properties configuration
+ * files. (Program entry point)
+ * 
+ */
+public class Boot implements Service {
+
+    static final Logger       log          = Logger.getLogger(Boot.class);
+
+    static final Map<String, Class<? extends Service>> classes      = new HashMap<String, Class<? extends Service>>();
+    static {
+        classes.put("zookeeper", ZookeeperService.class);
+        classes.put("cluster", ClusterService.class);
+        classes.put("resource", ResourceService.class);
+        classes.put("controller", ControllerService.class);
+        classes.put("metacluster", MetaClusterService.class);
+        classes.put("metaresource", MetaResourceService.class);
+        classes.put("metaprovider", MetaProviderService.class);
+        classes.put("metacontroller", MetaControllerService.class);
+    }
+
+    static final List<String> serviceOrder = Arrays.asList("zookeeper", "cluster", "resource", "metacluster", "metaresource",
+                                                                            "metaprovider", "controller", "metacontroller");
+
+    Properties                properties;
+    List<Service>             services     = Lists.newArrayList();
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        Preconditions.checkNotNull(properties);
+        this.properties = properties;
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.info(String.format("bootstraping started"));
+
+        for (String key : serviceOrder) {
+            if (BootUtils.hasNamespace(properties, key + ".0")) {
+                processIndexedNamespace(key);
+            } else if (BootUtils.hasNamespace(properties, key)) {
+                processNamespace(key);
+            }
+        }
+
+        log.info(String.format("bootstraping completed"));
+    }
+
+    private void processIndexedNamespace(String key) throws Exception {
+        int i = 0;
+        String indexedKey = key + "." + i;
+
+        while (BootUtils.hasNamespace(properties, indexedKey)) {
+            log.info(String.format("processing namespace '%s'", indexedKey));
+            Service service = BootUtils.createInstance(classes.get(key));
+            service.configure(BootUtils.getNamespace(properties, indexedKey));
+            service.start();
+
+            services.add(service);
+
+            i++;
+            indexedKey = key + "." + i;
+        }
+    }
+
+    private void processNamespace(String key) throws Exception {
+        log.info(String.format("processing namespace '%s'", key));
+        Service service = BootUtils.createInstance(classes.get(key));
+        service.configure(BootUtils.getNamespace(properties, key));
+        service.start();
+
+        services.add(service);
+    }
+
+    @Override
+    public void stop() throws Exception {
+        log.info(String.format("shutdown started"));
+
+        Collections.reverse(services);
+        for (Service service : services) {
+            service.stop();
+        }
+
+        log.info(String.format("shutdown completed"));
+    }
+
+    public Collection<Service> getServcies() {
+        return services;
+    }
+
+    public static void main(String[] args) throws Exception {
+        if (args.length < 1) {
+            log.error(String.format("Usage: Boot properties_path"));
+            return;
+        }
+
+        String resourcePath = args[0];
+
+        log.info(String.format("reading definition from '%s'", resourcePath));
+        Properties properties = new Properties();
+        properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+
+        final Boot boot = new Boot();
+        boot.configure(properties);
+        boot.start();
+
+        Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+            @Override
+            public void run() {
+                log.debug("Running shutdown hook");
+                try { boot.stop(); } catch (Exception ignore) {}
+            }
+        }));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/BootUtils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/BootUtils.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/BootUtils.java
new file mode 100644
index 0000000..2fb9ff6
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/BootUtils.java
@@ -0,0 +1,104 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.Collection;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+/**
+ * Utility for instantiating bootstrapping services and parsing hierarchical
+ * properties files.
+ * 
+ */
+public class BootUtils {
+
+    public static final String CLASS_PROPERTY = "class";
+    static final Logger        log            = Logger.getLogger(BootUtils.class);
+
+    public static boolean hasNamespace(Properties properties, String namespace) {
+        String prefix = namespace + ".";
+        for (String key : properties.stringPropertyNames()) {
+            if (key.startsWith(prefix))
+                return true;
+        }
+        return false;
+    }
+
+    public static Set<String> getNamespaces(Properties properties) {
+        Pattern pattern = Pattern.compile("^([^\\.\\=]+)");
+
+        Set<String> namespaces = Sets.newHashSet();
+
+        for (Map.Entry<Object, Object> rawEntry : properties.entrySet()) {
+            String key = (String) rawEntry.getKey();
+
+            Matcher matcher = pattern.matcher(key);
+            if (matcher.find()) {
+                namespaces.add(matcher.group(1));
+            }
+        }
+
+        return namespaces;
+    }
+
+    public static Properties getNamespace(Properties source, String namespace) {
+        Properties dest = new Properties();
+        String prefix = namespace + ".";
+
+        for (Map.Entry<Object, Object> rawEntry : source.entrySet()) {
+            String key = (String) rawEntry.getKey();
+            String value = (String) rawEntry.getValue();
+
+            if (key.startsWith(prefix)) {
+                String newKey = key.substring(prefix.length());
+                dest.put(newKey, value);
+            }
+        }
+
+        return dest;
+    }
+
+    public static Collection<Properties> getContainerProps(Properties properties) {
+        Collection<Properties> containerProps = Lists.newArrayList();
+
+        String containers = properties.getProperty("containers");
+        String containerTypes[] = StringUtils.split(containers, ",");
+
+        for (String containerType : containerTypes) {
+            Properties containerProp = BootUtils.getNamespace(BootUtils.getNamespace(properties, "container"), containerType);
+            log.debug(String.format("adding container type (type='%s', properties='%s')", containerType, containerProp));
+            containerProps.add(containerProp);
+        }
+
+        return containerProps;
+    }
+
+    @SuppressWarnings("unchecked")
+    public static <T> T createInstance(Class<?> clazz) throws Exception {
+        try {
+            log.debug(String.format("checking for default constructor in class '%s'", clazz.getSimpleName()));
+            return (T) clazz.getConstructor().newInstance();
+        } catch (Exception e) {
+            log.debug("no default constructor found");
+        }
+
+        throw new Exception(String.format("no suitable constructor for class '%s'", clazz.getSimpleName()));
+    }
+
+    public static <T> T createInstance(String className) throws Exception {
+        return createInstance(Class.forName(className));
+    }
+
+    private BootUtils() {
+        // left blank
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ClusterService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ClusterService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ClusterService.java
new file mode 100644
index 0000000..5b3ec7e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ClusterService.java
@@ -0,0 +1,46 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+/**
+ * Cluster bootstrapping. Create Helix data structures in zookeeper for the
+ * managed cluster.
+ * 
+ */
+public class ClusterService implements Service {
+
+    static final Logger log = Logger.getLogger(ClusterService.class);
+
+    String              name;
+    String              address;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        name = properties.getProperty("name", "cluster");
+        address = properties.getProperty("address", "localhost:2199");
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.info(String.format("setting up '%s/%s'", address, name));
+        HelixAdmin admin = new ZKHelixAdmin(address);
+        admin.addCluster(name, false);
+        admin.addStateModelDef(name, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+        admin.addStateModelDef(name, "MasterSlave", new StateModelDefinition(StateModelConfigGenerator.generateConfigForMasterSlave()));
+        admin.close();
+        log.info("setup complete");
+    }
+
+    @Override
+    public void stop() throws Exception {
+        // left blank
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ControllerService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ControllerService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ControllerService.java
new file mode 100644
index 0000000..2a95ecf
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ControllerService.java
@@ -0,0 +1,50 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.Properties;
+import java.util.concurrent.ScheduledExecutorService;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.metamanager.Service;
+import org.apache.log4j.Logger;
+
+/**
+ * Helix controller bootstrapping and management. Create standalone controller
+ * for managed Helix cluster.
+ * 
+ */
+public class ControllerService implements Service {
+
+    static final Logger      log = Logger.getLogger(ControllerService.class);
+
+    String                   name;
+    String                   cluster;
+    String                   address;
+
+    HelixManager             manager;
+
+    ScheduledExecutorService executor;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        name = properties.getProperty("name", "controller");
+        cluster = properties.getProperty("cluster", "cluster");
+        address = properties.getProperty("address", "localhost:2199");
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.info(String.format("starting controller '%s' at '%s/%s'", name, address, cluster));
+        manager = HelixControllerMain.startHelixController(address, cluster, name, HelixControllerMain.STANDALONE);
+    }
+
+    @Override
+    public void stop() throws Exception {
+        if (manager != null) {
+            log.info(String.format("stopping controller '%s' at '%s/%s'", name, address, cluster));
+            manager.disconnect();
+            manager = null;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaClusterService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaClusterService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaClusterService.java
new file mode 100644
index 0000000..340c961
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaClusterService.java
@@ -0,0 +1,61 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+/**
+ * Meta cluster bootstrapping. Create Helix data structures in zookeeper for
+ * the meta cluster.
+ * 
+ */
+public class MetaClusterService implements Service {
+
+    static final Logger log = Logger.getLogger(MetaClusterService.class);
+
+    String              name;
+    String              address;
+    String              managedCluster;
+    String              managedAddress;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        name = properties.getProperty("name", "metacluster");
+        address = properties.getProperty("address", "localhost:2199");
+        managedCluster = properties.getProperty("managedcluster", "cluster");
+        managedAddress = properties.getProperty("managedaddress", "localhost:2199");
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.info(String.format("setting up '%s/%s'", address, name));
+        HelixAdmin admin = new ZKHelixAdmin(address);
+        admin.addCluster(name, false);
+        admin.addStateModelDef(name, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+
+        HelixConfigScope scope = new HelixConfigScopeBuilder(ConfigScopeProperty.CLUSTER, name).build();
+        Map<String, String> properties = new HashMap<String, String>();
+        properties.put("cluster", managedCluster);
+        properties.put("address", managedAddress);
+        admin.setConfig(scope, properties);
+
+        admin.close();
+        log.info("setup complete");
+    }
+
+    @Override
+    public void stop() throws Exception {
+        // left blank
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaControllerService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaControllerService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaControllerService.java
new file mode 100644
index 0000000..a12753c
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaControllerService.java
@@ -0,0 +1,114 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.Properties;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.StatusProviderService;
+import org.apache.helix.metamanager.TargetProviderService;
+import org.apache.helix.metamanager.provider.ProviderRebalancerSingleton;
+import org.apache.helix.model.IdealState;
+import org.apache.log4j.Logger;
+
+/**
+ * Meta cluster controller bootstrapping and management. Create standalone
+ * controller for Helix meta cluster. Spawn StatusProvider and TargetProvider
+ * and trigger periodic status refresh in meta cluster.
+ * 
+ */
+public class MetaControllerService implements Service {
+
+    static final Logger      log = Logger.getLogger(MetaControllerService.class);
+
+    String                   name;
+    String                   metacluster;
+    String                   metaaddress;
+    long                     autorefresh;
+
+    HelixManager             manager;
+    StatusProviderService    statusService;
+    TargetProviderService    targetService;
+    ScheduledExecutorService executor;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        name = properties.getProperty("name", "controller");
+        metacluster = properties.getProperty("metacluster", "metacluster");
+        metaaddress = properties.getProperty("metaaddress", "localhost:2199");
+        autorefresh = Long.valueOf(properties.getProperty("autorefresh", "0"));
+
+        Properties statusProperties = BootUtils.getNamespace(properties, "status");
+        statusService = BootUtils.createInstance(Class.forName(statusProperties.getProperty("class")));
+        statusService.configure(statusProperties);
+        ProviderRebalancerSingleton.setStatusProvider(statusService);
+
+        Properties targetProperties = BootUtils.getNamespace(properties, "target");
+        targetService = BootUtils.createInstance(Class.forName(targetProperties.getProperty("class")));
+        targetService.configure(targetProperties);
+        ProviderRebalancerSingleton.setTargetProvider(targetService);
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.debug("Starting status service");
+        statusService.start();
+
+        log.debug("Starting target service");
+        targetService.start();
+
+        log.info(String.format("starting controller '%s' at '%s/%s'", name, metaaddress, metacluster));
+        manager = HelixControllerMain.startHelixController(metaaddress, metacluster, name, HelixControllerMain.STANDALONE);
+
+        if (autorefresh > 0) {
+            log.debug(String.format("installing autorefresh with interval %d ms", autorefresh));
+            executor = Executors.newSingleThreadScheduledExecutor();
+            executor.scheduleAtFixedRate(new RefreshRunnable(), autorefresh, autorefresh, TimeUnit.MILLISECONDS);
+        }
+    }
+
+    @Override
+    public void stop() throws Exception {
+        if (executor != null) {
+            executor.shutdownNow();
+            while (!executor.isTerminated()) {
+                Thread.sleep(100);
+            }
+            executor = null;
+        }
+        if (manager != null) {
+            log.info(String.format("Stopping controller '%s' at '%s/%s'", name, metaaddress, metacluster));
+            manager.disconnect();
+            manager = null;
+        }
+        if (targetService != null) {
+            log.debug("Stopping target service");
+            targetService.stop();
+            targetService = null;
+        }
+        if (statusService != null) {
+            log.debug("Stopping status service");
+            statusService.stop();
+            statusService = null;
+        }
+    }
+
+    private class RefreshRunnable implements Runnable {
+        @Override
+        public void run() {
+            log.debug("running status refresh");
+            HelixAdmin admin = manager.getClusterManagmentTool();
+
+            for (String metaResource : admin.getResourcesInCluster(metacluster)) {
+                log.debug(String.format("refreshing meta resource '%s'", metaResource));
+
+                IdealState poke = admin.getResourceIdealState(metacluster, metaResource);
+                admin.setResourceIdealState(metacluster, metaResource, poke);
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaProviderService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaProviderService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaProviderService.java
new file mode 100644
index 0000000..0b68580
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaProviderService.java
@@ -0,0 +1,81 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.provider.ProviderProperties;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Lists;
+
+/**
+ * ContainerProvider bootstrapping and management. Create container provider
+ * participant, configure with container properties from meta resources and
+ * connect to meta cluster.
+ * 
+ */
+public class MetaProviderService implements Service {
+
+    static final Logger log = Logger.getLogger(MetaProviderService.class);
+
+    Service             service;
+
+    String              clazz;
+    String              metaAddress;
+    String              metaCluster;
+
+    ProviderProperties  config;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        clazz = properties.getProperty("class");
+        metaAddress = properties.getProperty("metaaddress", "localhost:2199");
+        metaCluster = properties.getProperty("metacluster", "metacluster");
+
+        config = new ProviderProperties();
+        config.putAll(properties);
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.info(String.format("starting service '%s' (config=%s)", clazz, config));
+
+        HelixAdmin admin = new ZKHelixAdmin(metaAddress);
+
+        HelixConfigScope managedScope = new HelixConfigScopeBuilder(ConfigScopeProperty.CLUSTER, metaCluster).build();
+        Map<String, String> managedProps = admin.getConfig(managedScope, Lists.newArrayList("cluster", "address"));
+        config.putAll(managedProps);
+
+        for (String resource : admin.getResourcesInCluster(metaCluster)) {
+            HelixConfigScope resScope = new HelixConfigScopeBuilder(ConfigScopeProperty.RESOURCE, metaCluster, resource).build();
+            List<String> resKeys = admin.getConfigKeys(resScope);
+            Map<String, String> resProps = admin.getConfig(resScope, resKeys);
+
+            Properties properties = new Properties();
+            properties.putAll(resProps);
+
+            config.addContainer(resource, properties);
+        }
+
+        service = BootUtils.createInstance(clazz);
+        service.configure(config);
+        service.start();
+    }
+
+    @Override
+    public void stop() throws Exception {
+        log.info(String.format("stopping service '%s' (config=%s)", clazz, config));
+        if (service != null) {
+            service.stop();
+            service = null;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaResourceService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaResourceService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaResourceService.java
new file mode 100644
index 0000000..c8f0664
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaResourceService.java
@@ -0,0 +1,87 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.provider.ProviderRebalancer;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Maps;
+
+/**
+ * Bootstrapping meta resource. Create container type configuration in Helix
+ * zookeeper namespace.
+ * 
+ */
+public class MetaResourceService implements Service {
+
+    static final Logger log = Logger.getLogger(MetaResourceService.class);
+
+    String              metaCluster;
+    String              metaAddress;
+    String              name;
+    Map<String, String> config;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        metaCluster = properties.getProperty("metacluster", "metacluster");
+        metaAddress = properties.getProperty("metaaddress", "localhost:2199");
+        name = properties.getProperty("name", "container");
+
+        this.config = new HashMap<String, String>();
+        for (Map.Entry<Object, Object> entry : properties.entrySet()) {
+            this.config.put((String) entry.getKey(), (String) entry.getValue());
+        }
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.info(String.format("setting up meta resource '%s' at '%s/%s'", name, metaAddress, metaCluster));
+        HelixAdmin admin = new ZKHelixAdmin(metaAddress);
+
+        log.info(String.format("setting up container '%s' (config='%s')", name, config));
+
+        admin.addResource(metaCluster, name, 1, "OnlineOffline", RebalanceMode.USER_DEFINED.toString());
+        IdealState idealState = admin.getResourceIdealState(metaCluster, name);
+        idealState.setRebalancerClassName(ProviderRebalancer.class.getName());
+        idealState.setReplicas("1");
+
+        // BEGIN workaround
+        // FIXME workaround for HELIX-226
+        Map<String, List<String>> listFields = Maps.newHashMap();
+        Map<String, Map<String, String>> mapFields = Maps.newHashMap();
+        for (int i = 0; i < 256; i++) {
+            String partitionName = name + "_" + i;
+            listFields.put(partitionName, new ArrayList<String>());
+            mapFields.put(partitionName, new HashMap<String, String>());
+        }
+        idealState.getRecord().setListFields(listFields);
+        idealState.getRecord().setMapFields(mapFields);
+        // END workaround
+
+        admin.setResourceIdealState(metaCluster, name, idealState);
+
+        HelixConfigScope scope = new HelixConfigScopeBuilder(ConfigScopeProperty.RESOURCE, metaCluster, name).build();
+        admin.setConfig(scope, this.config);
+
+        admin.close();
+        log.info("setup complete");
+    }
+
+    @Override
+    public void stop() throws Exception {
+        // left blank
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaService.java
new file mode 100644
index 0000000..2e5e686
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaService.java
@@ -0,0 +1,80 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.bootstrap.BootUtils;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Lists;
+
+public class MetaService implements Service {
+
+    static final Logger log = Logger.getLogger(MetaService.class);
+
+    Service             service;
+
+    String              clazz;
+    String              metaAddress;
+    
+    String              metaCluster;
+
+    Properties          config;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        clazz = properties.getProperty("class");
+        metaAddress = properties.getProperty("metaaddress", "localhost:2199");
+        metaCluster = properties.getProperty("metacluster", "metacluster");
+
+        this.config = new Properties();
+        this.config.putAll(properties);
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.info(String.format("starting service '%s' (config=%s)", clazz, config));
+
+        HelixAdmin admin = new ZKHelixAdmin(metaAddress);
+        
+        HelixConfigScope managedScope = new HelixConfigScopeBuilder(ConfigScopeProperty.CLUSTER, metaCluster).build();
+        Map<String, String> managedProps = admin.getConfig(managedScope, Lists.newArrayList("cluster", "address"));
+        config.putAll(managedProps);
+        
+        Collection<String> resources = admin.getResourcesInCluster(metaCluster);
+        config.put("containers", StringUtils.join(resources, ","));
+        
+        for(String resource : admin.getResourcesInCluster(metaCluster)) {
+            HelixConfigScope resScope = new HelixConfigScopeBuilder(ConfigScopeProperty.RESOURCE, metaCluster, resource).build();
+            List<String> resKeys = admin.getConfigKeys(resScope);
+            Map<String, String> resProps = admin.getConfig(resScope, resKeys);
+            
+            for(Map.Entry<String, String> entry : resProps.entrySet()) {
+                config.put(resource + "." + entry.getKey(), entry.getValue());
+            }
+        }
+        
+        service = BootUtils.createInstance(clazz);
+        service.configure(config);
+        service.start();
+    }
+
+    @Override
+    public void stop() throws Exception {
+        log.info(String.format("stopping service '%s' (config=%s)", clazz, config));
+        if (service != null) {
+            service.stop();
+            service = null;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ResourceService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ResourceService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ResourceService.java
new file mode 100644
index 0000000..35bed91
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ResourceService.java
@@ -0,0 +1,61 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.log4j.Logger;
+
+/**
+ * Bootstrapping Helix resource. Create resource in Helix and configure
+ * properties.
+ * 
+ */
+public class ResourceService implements Service {
+
+    static final Logger log = Logger.getLogger(ResourceService.class);
+
+    String              cluster;
+    String              address;
+    String              container;
+    String              name;
+    String              model;
+    int                 partitions;
+    int                 replica;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        cluster = properties.getProperty("cluster", "cluster");
+        address = properties.getProperty("address", "localhost:2199");
+        name = properties.getProperty("name", "resource");
+        container = properties.getProperty("container", "container");
+        model = properties.getProperty("model", "OnlineOffline");
+        partitions = Integer.parseInt(properties.getProperty("partitions", "1"));
+        replica = Integer.parseInt(properties.getProperty("replica", "1"));
+    }
+
+    @Override
+    public void start() throws Exception {
+        log.info(String.format("setting up resource '%s' at '%s/%s'", name, address, cluster));
+        HelixAdmin admin = new ZKHelixAdmin(address);
+
+        log.info(String.format("setting up resource '%s' (container='%s', model='%s', partitions=%d, replica=%d)", name, container, model, partitions, replica));
+
+        admin.addResource(cluster, name, partitions, model, RebalanceMode.FULL_AUTO.toString());
+        IdealState idealState = admin.getResourceIdealState(cluster, name);
+        idealState.setInstanceGroupTag(container);
+        idealState.setReplicas(String.valueOf(replica));
+        admin.setResourceIdealState(cluster, name, idealState);
+        admin.close();
+        log.info("setup complete");
+    }
+
+    @Override
+    public void stop() throws Exception {
+        // left blank
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ZookeeperService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ZookeeperService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ZookeeperService.java
new file mode 100644
index 0000000..b220dc8
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ZookeeperService.java
@@ -0,0 +1,64 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.io.File;
+import java.util.Properties;
+
+import org.I0Itec.zkclient.IDefaultNameSpace;
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkServer;
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.metamanager.Service;
+import org.apache.log4j.Logger;
+
+/**
+ * Bootstrapping zookeeper. Convenience tool for creating standalone zookeeper
+ * instance for test deployments. For production use a separate zookeeper
+ * cluster is strongly recommended.
+ * 
+ */
+public class ZookeeperService implements Service {
+
+    static final Logger log = Logger.getLogger(ZookeeperService.class);
+
+    String              dataDir;
+    String              logDir;
+    int                 port;
+
+    ZkServer            server;
+
+    @Override
+    public void configure(Properties properties) throws Exception {
+        dataDir = properties.getProperty("datadir", "/tmp/zk/data");
+        logDir = properties.getProperty("logdir", "/tmp/zk/log");
+        port = Integer.parseInt(properties.getProperty("port", "2199"));
+    }
+
+    @Override
+    public void start() {
+        log.info(String.format("starting zookeeper service (dataDir='%s', logDir='%s', port=%d)", dataDir, logDir, port));
+
+        FileUtils.deleteQuietly(new File(dataDir));
+        FileUtils.deleteQuietly(new File(logDir));
+
+        IDefaultNameSpace defaultNameSpace = new IDefaultNameSpace() {
+            @Override
+            public void createDefaultNameSpace(ZkClient zkClient) {
+                // left blank
+            }
+        };
+
+        server = new ZkServer(dataDir, logDir, defaultNameSpace, port);
+        server.start();
+    }
+
+    @Override
+    public void stop() {
+        log.info("stopping zookeeper service");
+
+        if (server != null) {
+            server.shutdown();
+            server = null;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/FileTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/FileTargetProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/FileTargetProvider.java
new file mode 100644
index 0000000..b70d9ba
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/FileTargetProvider.java
@@ -0,0 +1,29 @@
+package org.apache.helix.metamanager.cluster;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.ClusterStatusProvider;
+
+
+public class FileTargetProvider implements ClusterStatusProvider {
+
+	final File file;
+	
+	public FileTargetProvider(Properties properties) {
+	    this.file = new File(properties.getProperty("path"));
+	}
+
+	@Override
+	public int getTargetContainerCount(String containerType) throws FileNotFoundException, IOException, IllegalArgumentException {
+		Properties properties = new Properties();
+		properties.load(new FileReader(file));
+		if(!properties.contains(containerType))
+			throw new IllegalArgumentException(String.format("container type '%s' not found in '%s'", containerType, file.getCanonicalPath()));
+		return Integer.parseInt((String)properties.get(containerType));
+	}
+
+}


[15/15] git commit: Adding Helix-task-framework and Yarn integration modules

Posted by ki...@apache.org.
Adding Helix-task-framework and Yarn integration modules


Project: http://git-wip-us.apache.org/repos/asf/incubator-helix/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-helix/commit/e38aa54b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-helix/tree/e38aa54b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-helix/diff/e38aa54b

Branch: refs/heads/helix-yarn
Commit: e38aa54b07453d3dd1690317cb5e39efe5a4b79c
Parents: 84fb26b
Author: Kishore Gopalakrishna <g....@gmail.com>
Authored: Fri Sep 20 11:29:43 2013 -0700
Committer: Kishore Gopalakrishna <g....@gmail.com>
Committed: Fri Sep 20 11:29:43 2013 -0700

----------------------------------------------------------------------
 .../main/java/org/apache/helix/ZNRecord.java    |  21 +-
 .../controller/GenericHelixController.java      |   1 -
 .../stages/CurrentStateComputationStage.java    |  15 +-
 .../controller/stages/CurrentStateOutput.java   |  62 +-
 .../handling/HelixStateTransitionHandler.java   |  83 ++-
 .../messaging/handling/HelixTaskResult.java     |   9 +
 .../org/apache/helix/model/CurrentState.java    |  36 +
 .../apache/helix/model/ResourceAssignment.java  |  14 +
 .../java/org/apache/helix/task/TargetState.java |  25 +
 .../main/java/org/apache/helix/task/Task.java   |  25 +
 .../java/org/apache/helix/task/TaskConfig.java  | 333 +++++++++
 .../org/apache/helix/task/TaskConstants.java    |  31 +
 .../java/org/apache/helix/task/TaskContext.java | 147 ++++
 .../java/org/apache/helix/task/TaskDag.java     | 157 ++++
 .../java/org/apache/helix/task/TaskDriver.java  | 382 ++++++++++
 .../java/org/apache/helix/task/TaskFactory.java |  23 +
 .../apache/helix/task/TaskPartitionState.java   |  31 +
 .../org/apache/helix/task/TaskRebalancer.java   | 736 +++++++++++++++++++
 .../java/org/apache/helix/task/TaskResult.java  |  63 ++
 .../java/org/apache/helix/task/TaskRunner.java  | 190 +++++
 .../java/org/apache/helix/task/TaskState.java   |  31 +
 .../org/apache/helix/task/TaskStateModel.java   | 266 +++++++
 .../helix/task/TaskStateModelFactory.java       |  34 +
 .../java/org/apache/helix/task/TaskUtil.java    | 161 ++++
 .../java/org/apache/helix/task/Workflow.java    | 261 +++++++
 .../org/apache/helix/task/WorkflowConfig.java   | 116 +++
 .../org/apache/helix/task/WorkflowContext.java  | 110 +++
 .../org/apache/helix/task/beans/TaskBean.java   |  30 +
 .../apache/helix/task/beans/WorkflowBean.java   |  21 +
 .../org/apache/helix/tools/ClusterSetup.java    |   2 +
 .../helix/tools/StateModelConfigGenerator.java  |  96 ++-
 .../org/apache/helix/DummyProcessThread.java    |  12 +-
 .../integration/ZkIntegrationTestBase.java      |   3 +-
 .../integration/task/TestTaskRebalancer.java    | 330 +++++++++
 .../task/TestTaskRebalancerStopResume.java      | 231 ++++++
 .../apache/helix/integration/task/TestUtil.java | 128 ++++
 .../integration/task/WorkflowGenerator.java     |  76 ++
 recipes/auto-scale/README.md                    |  82 +++
 recipes/auto-scale/pom.xml                      | 210 ++++++
 .../auto-scale/src/main/assembly/assembly.xml   |  32 +
 .../auto-scale/src/main/config/log4j.properties |  30 +
 .../apache/helix/autoscale/ClusterAdmin.java    |  30 +
 .../helix/autoscale/ContainerProvider.java      |  40 +
 .../autoscale/ContainerProviderService.java     |   9 +
 .../helix/autoscale/HelixClusterAdmin.java      |  43 ++
 .../org/apache/helix/autoscale/Service.java     |  38 +
 .../apache/helix/autoscale/StatusProvider.java  |  35 +
 .../helix/autoscale/StatusProviderService.java  |   9 +
 .../apache/helix/autoscale/TargetProvider.java  |  25 +
 .../helix/autoscale/TargetProviderService.java  |   9 +
 .../apache/helix/autoscale/ZookeeperSetter.java |  30 +
 .../helix/autoscale/bootstrapper/Boot.java      | 132 ++++
 .../helix/autoscale/bootstrapper/BootUtils.java | 104 +++
 .../autoscale/bootstrapper/ClusterService.java  |  46 ++
 .../bootstrapper/ControllerService.java         |  50 ++
 .../bootstrapper/MetaClusterService.java        |  61 ++
 .../bootstrapper/MetaControllerService.java     | 114 +++
 .../bootstrapper/MetaProviderService.java       |  81 ++
 .../bootstrapper/MetaResourceService.java       |  87 +++
 .../autoscale/bootstrapper/ResourceService.java |  61 ++
 .../bootstrapper/ZookeeperService.java          |  64 ++
 .../autoscale/container/ContainerProcess.java   | 133 ++++
 .../container/ContainerProcessProperties.java   |  66 ++
 .../autoscale/container/ContainerUtils.java     |  46 ++
 .../autoscale/impl/FileTargetProvider.java      |  51 ++
 .../autoscale/impl/RedisTargetProvider.java     | 356 +++++++++
 .../autoscale/impl/StaticTargetProvider.java    |  62 ++
 .../impl/container/DummyMasterSlaveProcess.java |  76 ++
 .../container/DummyOnlineOfflineProcess.java    |  66 ++
 .../impl/container/RedisServerProcess.java      | 140 ++++
 .../container/ZookeeperMasterSlaveProcess.java  | 108 +++
 .../impl/local/LocalContainerProvider.java      | 119 +++
 .../local/LocalContainerProviderProcess.java    |  45 ++
 .../impl/local/LocalContainerSingleton.java     |  56 ++
 .../impl/local/LocalStatusProvider.java         |  53 ++
 .../impl/shell/ShellContainerProcess.java       |  93 +++
 .../impl/shell/ShellContainerProvider.java      | 151 ++++
 .../shell/ShellContainerProviderProcess.java    |  45 ++
 .../impl/shell/ShellContainerSingleton.java     |  58 ++
 .../impl/shell/ShellStatusProvider.java         |  64 ++
 .../helix/autoscale/impl/shell/ShellUtils.java  |  54 ++
 .../autoscale/impl/yarn/YarnContainerData.java  |  86 +++
 .../impl/yarn/YarnContainerProcess.java         |  53 ++
 .../yarn/YarnContainerProcessProperties.java    |  40 +
 .../impl/yarn/YarnContainerProvider.java        | 143 ++++
 .../impl/yarn/YarnContainerProviderProcess.java | 158 ++++
 .../yarn/YarnContainerProviderProperties.java   |  64 ++
 .../impl/yarn/YarnContainerService.java         | 156 ++++
 .../autoscale/impl/yarn/YarnDataProvider.java   |  73 ++
 .../autoscale/impl/yarn/YarnMasterProcess.java  | 144 ++++
 .../impl/yarn/YarnMasterProperties.java         |  13 +
 .../autoscale/impl/yarn/YarnMasterService.java  | 414 +++++++++++
 .../autoscale/impl/yarn/YarnStatusProvider.java |  67 ++
 .../helix/autoscale/impl/yarn/YarnUtils.java    | 174 +++++
 .../impl/yarn/ZookeeperYarnDataProvider.java    | 100 +++
 .../autoscale/provider/ProviderProcess.java     |  82 +++
 .../autoscale/provider/ProviderProperties.java  |  97 +++
 .../autoscale/provider/ProviderRebalancer.java  | 352 +++++++++
 .../provider/ProviderRebalancerSingleton.java   |  38 +
 .../autoscale/provider/ProviderStateModel.java  | 114 +++
 .../provider/ProviderStateModelFactory.java     |  27 +
 .../src/main/resources/Boot2By2Local.properties |  87 +++
 .../src/main/resources/Boot2By2Shell.properties |  87 +++
 .../src/main/resources/Boot2By2Yarn.properties  |  98 +++
 .../src/main/resources/BootLocal.properties     |  68 ++
 .../main/resources/RedisYarnSample.properties   |  89 +++
 .../src/main/resources/log4j.properties         |  30 +
 recipes/auto-scale/src/test/config/testng.xml   |  27 +
 .../apache/helix/autoscale/BootstrapperIT.java  | 134 ++++
 .../org/apache/helix/autoscale/FailoverIT.java  | 195 +++++
 .../autoscale/LocalContainerProviderIT.java     |  80 ++
 .../autoscale/ShellContainerProviderIT.java     |  95 +++
 .../org/apache/helix/autoscale/TestUtils.java   | 443 +++++++++++
 .../org/apache/helix/autoscale/TestUtilsUT.java |  63 ++
 .../autoscale/YarnContainerProviderIT.java      | 101 +++
 .../src/test/resources/distributed.properties   |  13 +
 .../src/test/resources/log4j.properties         |  30 +
 .../src/test/resources/standalone.properties    |  13 +
 recipes/meta-cluster-manager/README.md          |  82 +++
 recipes/meta-cluster-manager/pom.xml            | 210 ++++++
 .../src/main/assembly/assembly.xml              |  32 +
 .../src/main/config/log4j.properties            |  30 +
 .../apache/helix/metamanager/ClusterAdmin.java  |  30 +
 .../metamanager/ClusterContainerProvider.java   |  32 +
 .../ClusterContainerStatusProvider.java         |   7 +
 .../metamanager/ClusterInstanceInjector.java    |   6 +
 .../metamanager/ClusterStatusProvider.java      |   5 +
 .../apache/helix/metamanager/ConfigTool.java    |  47 ++
 .../helix/metamanager/ContainerProvider.java    |  40 +
 .../metamanager/ContainerProviderService.java   |   9 +
 .../metamanager/ContainerStatusProvider.java    |   7 +
 .../helix/metamanager/FileStatusProvider.java   |  27 +
 .../helix/metamanager/HelixClusterAdmin.java    |  43 ++
 .../org/apache/helix/metamanager/Manager.java   | 129 ++++
 .../apache/helix/metamanager/ManagerDemo.java   | 463 ++++++++++++
 .../helix/metamanager/ManagerFactory.java       |  39 +
 .../helix/metamanager/ManagerProcess.java       |  67 ++
 .../helix/metamanager/ManagerRebalancer.java    | 167 +++++
 .../helix/metamanager/MetaManagerDemo.java      | 457 ++++++++++++
 .../org/apache/helix/metamanager/Service.java   |  38 +
 .../helix/metamanager/StaticStatusProvider.java |  28 +
 .../helix/metamanager/StatusProvider.java       |  35 +
 .../metamanager/StatusProviderService.java      |   9 +
 .../helix/metamanager/TargetProvider.java       |  25 +
 .../metamanager/TargetProviderService.java      |   9 +
 .../helix/metamanager/ZookeeperSetter.java      |  30 +
 .../helix/metamanager/bootstrap/BootUtil.java   |  58 ++
 .../helix/metamanager/bootstrap/BootUtils.java  | 127 ++++
 .../metamanager/bootstrap/Bootstrapper.java     |  93 +++
 .../metamanager/bootstrap/ManagedCluster.java   |  87 +++
 .../metamanager/bootstrap/MetaCluster.java      | 201 +++++
 .../metamanager/bootstrap/ProviderWrapper.java  | 162 ++++
 .../metamanager/bootstrap/StatusWrapper.java    | 122 +++
 .../metamanager/bootstrap/TargetWrapper.java    | 117 +++
 .../metamanager/bootstrap/ZookeeperWrapper.java |  57 ++
 .../helix/metamanager/bootstrapper/Boot.java    | 132 ++++
 .../metamanager/bootstrapper/BootUtils.java     | 104 +++
 .../bootstrapper/ClusterService.java            |  46 ++
 .../bootstrapper/ControllerService.java         |  50 ++
 .../bootstrapper/MetaClusterService.java        |  61 ++
 .../bootstrapper/MetaControllerService.java     | 114 +++
 .../bootstrapper/MetaProviderService.java       |  81 ++
 .../bootstrapper/MetaResourceService.java       |  87 +++
 .../metamanager/bootstrapper/MetaService.java   |  80 ++
 .../bootstrapper/ResourceService.java           |  61 ++
 .../bootstrapper/ZookeeperService.java          |  64 ++
 .../metamanager/cluster/FileTargetProvider.java |  29 +
 .../cluster/RedisTargetProvider.java            | 329 +++++++++
 .../cluster/StaticTargetProvider.java           |  41 ++
 .../metamanager/container/ContainerProcess.java | 133 ++++
 .../container/ContainerProcessProperties.java   |  66 ++
 .../container/ContainerStateModel.java          |  64 ++
 .../container/ContainerStateModelFactory.java   |  30 +
 .../metamanager/container/ContainerUtils.java   |  46 ++
 .../container/impl/DummyMasterSlaveProcess.java |  76 ++
 .../impl/DummyOnlineOfflineProcess.java         |  64 ++
 .../container/impl/DummyProcess.java            |  76 ++
 .../container/impl/RedisServerProcess.java      | 135 ++++
 .../impl/ZookeeperMasterSlaveProcess.java       | 104 +++
 .../metamanager/impl/FileTargetProvider.java    |  51 ++
 .../metamanager/impl/RedisTargetProvider.java   | 356 +++++++++
 .../metamanager/impl/StaticTargetProvider.java  |  62 ++
 .../impl/container/DummyMasterSlaveProcess.java |  76 ++
 .../container/DummyOnlineOfflineProcess.java    |  66 ++
 .../impl/container/RedisServerProcess.java      | 140 ++++
 .../container/ZookeeperMasterSlaveProcess.java  | 108 +++
 .../impl/local/LocalContainerProcess.java       |  64 ++
 .../impl/local/LocalContainerProvider.java      | 119 +++
 .../local/LocalContainerProviderProcess.java    |  45 ++
 .../impl/local/LocalContainerSingleton.java     |  56 ++
 .../local/LocalContainerStatusProvider.java     |  37 +
 .../impl/local/LocalStatusProvider.java         |  53 ++
 .../impl/shell/ShellContainerProcess.java       |  93 +++
 .../impl/shell/ShellContainerProvider.java      | 151 ++++
 .../shell/ShellContainerProviderProcess.java    |  45 ++
 .../impl/shell/ShellContainerSingleton.java     |  58 ++
 .../shell/ShellContainerStatusProvider.java     |  52 ++
 .../impl/shell/ShellStatusProvider.java         |  64 ++
 .../metamanager/impl/shell/ShellUtils.java      |  54 ++
 .../impl/yarn/ApplicationConfig.java            |  32 +
 .../impl/yarn/ContainerMetadata.java            |  80 ++
 .../metamanager/impl/yarn/MetadataProvider.java |  42 ++
 .../metamanager/impl/yarn/MetadataService.java  |  42 ++
 .../helix/metamanager/impl/yarn/Utils.java      |  94 +++
 .../metamanager/impl/yarn/YarnApplication.java  | 171 +++++
 .../impl/yarn/YarnApplicationProperties.java    |  91 +++
 .../impl/yarn/YarnContainerData.java            |  86 +++
 .../impl/yarn/YarnContainerProcess.java         |  53 ++
 .../yarn/YarnContainerProcessProperties.java    |  40 +
 .../impl/yarn/YarnContainerProvider.java        | 143 ++++
 .../impl/yarn/YarnContainerProviderProcess.java | 158 ++++
 .../yarn/YarnContainerProviderProperties.java   |  64 ++
 .../impl/yarn/YarnContainerService.java         | 156 ++++
 .../impl/yarn/YarnContainerStatusProvider.java  |  52 ++
 .../metamanager/impl/yarn/YarnDataProvider.java |  73 ++
 .../impl/yarn/YarnMasterProcess.java            | 144 ++++
 .../impl/yarn/YarnMasterProperties.java         |  13 +
 .../impl/yarn/YarnMasterService.java            | 414 +++++++++++
 .../impl/yarn/YarnStatusProvider.java           |  67 ++
 .../helix/metamanager/impl/yarn/YarnUtils.java  | 174 +++++
 .../impl/yarn/ZookeeperMetadataProvider.java    | 116 +++
 .../impl/yarn/ZookeeperMetadataService.java     | 102 +++
 .../impl/yarn/ZookeeperYarnDataProvider.java    | 100 +++
 .../metamanager/managed/ContainerProcess.java   |  85 +++
 .../metamanager/managed/HelixClusterAdmin.java  |  42 ++
 .../managed/LocalClusterManager.java            |  42 ++
 .../managed/LocalContainerProvider.java         |  87 +++
 .../managed/LocalProcessProvider.java           | 100 +++
 .../managed/LocalStatusProvider.java            |  22 +
 .../helix/metamanager/managed/Managed.java      |  64 ++
 .../metamanager/managed/ManagedFactory.java     |  30 +
 .../metamanager/managed/ManagedProcess.java     |  85 +++
 .../managed/ShellContainerProvider.java         |  85 +++
 .../managed/ShellProcessProvider.java           | 148 ++++
 .../managed/YarnContainerProvider.java          |  37 +
 .../metamanager/provider/ProviderProcess.java   |  82 +++
 .../provider/ProviderProperties.java            |  97 +++
 .../provider/ProviderRebalancer.java            | 352 +++++++++
 .../provider/ProviderRebalancerSingleton.java   |  38 +
 .../provider/ProviderStateModel.java            | 114 +++
 .../provider/ProviderStateModelFactory.java     |  27 +
 .../provider/local/LocalContainerProvider.java  |  75 ++
 .../provider/local/LocalContainerSingleton.java |  40 +
 .../local/LocalContainerStatusProvider.java     |  37 +
 .../provider/shell/ShellContainerProvider.java  |  81 ++
 .../provider/shell/ShellContainerSingleton.java |  38 +
 .../shell/ShellContainerStatusProvider.java     |  52 ++
 .../provider/yarn/ApplicationConfig.java        |  32 +
 .../provider/yarn/ContainerMetadata.java        |  50 ++
 .../provider/yarn/MetadataService.java          |  42 ++
 .../helix/metamanager/provider/yarn/Utils.java  |  94 +++
 .../provider/yarn/YarnApplication.java          | 125 ++++
 .../provider/yarn/YarnContainerProcess.java     |  60 ++
 .../provider/yarn/YarnContainerProvider.java    | 108 +++
 .../provider/yarn/YarnContainerService.java     | 129 ++++
 .../yarn/YarnContainerStatusProvider.java       |  52 ++
 .../metamanager/provider/yarn/YarnMaster.java   | 134 ++++
 .../provider/yarn/YarnMasterProcess.java        | 119 +++
 .../provider/yarn/YarnMasterService.java        | 361 +++++++++
 .../metamanager/provider/yarn/YarnProcess.java  | 171 +++++
 .../provider/yarn/ZookeeperMetadataService.java | 102 +++
 .../metamanager/yarn/ApplicationConfig.java     |  32 +
 .../metamanager/yarn/ContainerMetadata.java     |  50 ++
 .../helix/metamanager/yarn/ContainerNode.java   |  61 ++
 .../helix/metamanager/yarn/MessageNode.java     |  20 +
 .../helix/metamanager/yarn/MetadataService.java | 146 ++++
 .../apache/helix/metamanager/yarn/Utils.java    |  93 +++
 .../helix/metamanager/yarn/YarnApplication.java | 126 ++++
 .../helix/metamanager/yarn/YarnClient.java      |   5 +
 .../helix/metamanager/yarn/YarnContainer.java   |  14 +
 .../metamanager/yarn/YarnContainerProvider.java |  90 +++
 .../metamanager/yarn/YarnContainerService.java  | 370 ++++++++++
 .../helix/metamanager/yarn/YarnHelper.java      |   5 +
 .../helix/metamanager/yarn/YarnMaster.java      | 134 ++++
 .../helix/metamanager/yarn/YarnProcess.java     | 171 +++++
 .../src/main/resources/2by2local.properties     |  52 ++
 .../resources/2by2localMixedModels.properties   |  52 ++
 .../src/main/resources/2by2shell.properties     |  52 ++
 .../src/main/resources/2by2yarn.properties      |  58 ++
 .../main/resources/2by2yarnZookeeper.properties |  58 ++
 .../src/main/resources/2meta2managed.properties |  52 ++
 .../src/main/resources/Boot2By2Local.properties |  87 +++
 .../src/main/resources/Boot2By2Shell.properties |  87 +++
 .../src/main/resources/Boot2By2Yarn.properties  |  98 +++
 .../src/main/resources/BootLocal.properties     |  68 ++
 .../src/main/resources/boot/cluster.properties  |   2 +
 .../main/resources/boot/controller.properties   |   4 +
 .../main/resources/boot/metacluster.properties  |   4 +
 .../resources/boot/metacontroller.properties    |   4 +
 .../src/main/resources/boot/resdb.properties    |   4 +
 .../src/main/resources/boot/resws.properties    |   4 +
 .../main/resources/boot/zookeeper.properties    |   4 +
 .../src/main/resources/container.properties     |   1 +
 .../src/main/resources/log4j.properties         |  30 +
 .../src/main/resources/redisLocal.properties    |  50 ++
 .../src/main/resources/redisYarn.properties     |  52 ++
 .../src/test/conf/testng-integration.xml        |  27 +
 .../src/test/conf/testng-unit.xml               |  27 +
 .../src/test/conf/testng.xml                    |  27 +
 .../src/test/config/testng-integration.xml      |  27 +
 .../src/test/config/testng-unit.xml             |  27 +
 .../src/test/config/testng.xml                  |  27 +
 .../helix/metamanager/BootstrapperIT.java       | 134 ++++
 .../apache/helix/metamanager/FailoverIT.java    | 195 +++++
 .../metamanager/LocalContainerProviderIT.java   |  80 ++
 .../metamanager/ShellContainerProviderIT.java   |  95 +++
 .../metamanager/TestContainerProvider.java      |  17 +
 .../helix/metamanager/TestStatusProvider.java   |  20 +
 .../org/apache/helix/metamanager/TestUtils.java | 438 +++++++++++
 .../apache/helix/metamanager/TestUtilsTest.java |  30 +
 .../apache/helix/metamanager/TestUtilsUT.java   |  63 ++
 .../metamanager/YarnContainerProviderIT.java    | 101 +++
 .../metamanager/integration/BootstrapperIT.java | 127 ++++
 .../metamanager/integration/FailoverIT.java     | 172 +++++
 .../integration/LocalContainerProviderIT.java   |  72 ++
 .../integration/MultipleProviderFailoverIT.java | 148 ++++
 .../integration/ShellContainerProviderIT.java   |  87 +++
 .../integration/YarnContainerProviderIT.java    |  93 +++
 .../helix/metamanager/unit/TestUtilsTestUT.java |  62 ++
 .../helix/metamanager/unit/TestUtilsUT.java     |  55 ++
 .../src/test/resources/distributed.properties   |  13 +
 .../src/test/resources/log4j.properties         |  30 +
 .../src/test/resources/standalone.properties    |  13 +
 recipes/pom.xml                                 |   1 +
 324 files changed, 28806 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/ZNRecord.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/ZNRecord.java b/helix-core/src/main/java/org/apache/helix/ZNRecord.java
index 56a6cf2..3ac9485 100644
--- a/helix-core/src/main/java/org/apache/helix/ZNRecord.java
+++ b/helix-core/src/main/java/org/apache/helix/ZNRecord.java
@@ -570,20 +570,27 @@ public class ZNRecord {
    */
   public void subtract(ZNRecord value) {
     for (String key : value.getSimpleFields().keySet()) {
-      if (simpleFields.containsKey(key)) {
-        simpleFields.remove(key);
-      }
+      simpleFields.remove(key);
     }
 
     for (String key : value.getListFields().keySet()) {
-      if (listFields.containsKey(key)) {
-        listFields.remove(key);
-      }
+      listFields.remove(key);
     }
 
     for (String key : value.getMapFields().keySet()) {
-      if (mapFields.containsKey(key)) {
+      Map<String, String> map = value.getMapField(key);
+      if (map == null) {
         mapFields.remove(key);
+      } else {
+        Map<String, String> nestedMap = mapFields.get(key);
+        if (nestedMap != null) {
+          for (String mapKey : map.keySet()) {
+            nestedMap.remove(mapKey);
+          }
+          if (nestedMap.size() == 0) {
+            mapFields.remove(key);
+          }
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java b/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
index 8e4e1ea..03e5489 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
@@ -181,7 +181,6 @@ public class GenericHelixController implements ConfigChangeListener, IdealStateC
       Pipeline rebalancePipeline = new Pipeline();
       rebalancePipeline.addStage(new ResourceComputationStage());
       rebalancePipeline.addStage(new CurrentStateComputationStage());
-      rebalancePipeline.addStage(new RebalanceIdealStateStage());
       rebalancePipeline.addStage(new BestPossibleStateCalcStage());
       rebalancePipeline.addStage(new MessageGenerationPhase());
       rebalancePipeline.addStage(new MessageSelectionStage());

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateComputationStage.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateComputationStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateComputationStage.java
index 6097432..6a30a9d 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateComputationStage.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateComputationStage.java
@@ -118,9 +118,18 @@ public class CurrentStateComputationStage extends AbstractBaseStage {
         for (String partitionName : partitionStateMap.keySet()) {
           Partition partition = resource.getPartition(partitionName);
           if (partition != null) {
-            currentStateOutput.setCurrentState(resourceName, partition, instanceName,
-                currentState.getState(partitionName));
-
+            currentStateOutput.setCurrentState(resourceName,
+                                               partition,
+                                               instanceName,
+                                               currentState.getState(partitionName));
+            currentStateOutput.setRequestedState(resourceName,
+                                                 partition,
+                                                 instanceName,
+                                                 currentState.getRequestedState(partitionName));
+            currentStateOutput.setInfo(resourceName,
+                                       partition,
+                                       instanceName,
+                                       currentState.getInfo(partitionName));
           } else {
             // log
           }

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateOutput.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateOutput.java b/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateOutput.java
index b41f14b..9537272 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateOutput.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateOutput.java
@@ -22,13 +22,19 @@ package org.apache.helix.controller.stages;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
-
 import org.apache.helix.model.CurrentState;
 import org.apache.helix.model.Partition;
 
 public class CurrentStateOutput {
   private final Map<String, Map<Partition, Map<String, String>>> _currentStateMap;
   private final Map<String, Map<Partition, Map<String, String>>> _pendingStateMap;
+  // Contains per-resource maps of partition -> (instance, requested_state). This corresponds to the REQUESTED_STATE
+  // field in the CURRENTSTATES node.
+  private final Map<String, Map<Partition, Map<String, String>>> _requestedStateMap;
+  // Contains per-resource maps of partition -> (instance, info). This corresponds to the INFO field in the
+  // CURRENTSTATES node. This is information returned by state transition methods on the participants. It may be used
+  // by the rebalancer.
+  private final Map<String, Map<Partition, Map<String, String>>> _infoMap;
   private final Map<String, String> _resourceStateModelMap;
   private final Map<String, CurrentState> _curStateMetaMap;
 
@@ -37,7 +43,8 @@ public class CurrentStateOutput {
     _pendingStateMap = new HashMap<String, Map<Partition, Map<String, String>>>();
     _resourceStateModelMap = new HashMap<String, String>();
     _curStateMetaMap = new HashMap<String, CurrentState>();
-
+    _requestedStateMap = new HashMap<String, Map<Partition, Map<String, String>>>();
+    _infoMap = new HashMap<String, Map<Partition, Map<String, String>>>();
   }
 
   public void setResourceStateModelDef(String resourceName, String stateModelDefName) {
@@ -78,6 +85,29 @@ public class CurrentStateOutput {
     _currentStateMap.get(resourceName).get(partition).put(instanceName, state);
   }
 
+  public void setRequestedState(String resourceName, Partition partition, String instanceName, String state) {
+    if (!_requestedStateMap.containsKey(resourceName)) {
+      _requestedStateMap.put(resourceName, new HashMap<Partition, Map<String, String>>());
+    }
+    if (!_requestedStateMap.get(resourceName).containsKey(partition)) {
+      _requestedStateMap.get(resourceName).put(partition, new HashMap<String, String>());
+    }
+    _requestedStateMap.get(resourceName).get(partition).put(instanceName, state);
+  }
+
+  public void setInfo(String resourceName, Partition partition, String instanceName, String state)
+  {
+    if (!_infoMap.containsKey(resourceName))
+    {
+      _infoMap.put(resourceName, new HashMap<Partition, Map<String, String>>());
+    }
+    if (!_infoMap.get(resourceName).containsKey(partition))
+    {
+      _infoMap.get(resourceName).put(partition, new HashMap<String, String>());
+    }
+    _infoMap.get(resourceName).get(partition).put(instanceName, state);
+  }
+
   public void setPendingState(String resourceName, Partition partition, String instanceName,
       String state) {
     if (!_pendingStateMap.containsKey(resourceName)) {
@@ -107,6 +137,34 @@ public class CurrentStateOutput {
     return null;
   }
 
+  public String getRequestedState(String resourceName, Partition partition, String instanceName)
+  {
+    Map<Partition, Map<String, String>> map = _requestedStateMap.get(resourceName);
+    if (map != null)
+    {
+      Map<String, String> instanceStateMap = map.get(partition);
+      if (instanceStateMap != null)
+      {
+        return instanceStateMap.get(instanceName);
+      }
+    }
+    return null;
+  }
+
+  public String getInfo(String resourceName, Partition partition, String instanceName)
+  {
+    Map<Partition, Map<String, String>> map = _infoMap.get(resourceName);
+    if (map != null)
+    {
+      Map<String, String> instanceStateMap = map.get(partition);
+      if (instanceStateMap != null)
+      {
+        return instanceStateMap.get(instanceName);
+      }
+    }
+    return null;
+  }
+
   /**
    * given (resource, partition, instance), returns toState
    * @param resourceName

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixStateTransitionHandler.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixStateTransitionHandler.java b/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixStateTransitionHandler.java
index 627babc..8da7ec9 100644
--- a/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixStateTransitionHandler.java
+++ b/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixStateTransitionHandler.java
@@ -25,10 +25,10 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Date;
 import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
 import java.util.concurrent.ConcurrentHashMap;
-
 import org.apache.helix.HelixAdmin;
-import org.apache.helix.HelixConstants;
 import org.apache.helix.HelixDataAccessor;
 import org.apache.helix.HelixDefinedState;
 import org.apache.helix.HelixException;
@@ -36,9 +36,10 @@ import org.apache.helix.HelixManager;
 import org.apache.helix.NotificationContext;
 import org.apache.helix.NotificationContext.MapKey;
 import org.apache.helix.PropertyKey;
+import org.apache.helix.PropertyKey.Builder;
+import org.apache.helix.ZNRecord;
 import org.apache.helix.ZNRecordBucketizer;
 import org.apache.helix.ZNRecordDelta;
-import org.apache.helix.PropertyKey.Builder;
 import org.apache.helix.ZNRecordDelta.MergeOperation;
 import org.apache.helix.model.CurrentState;
 import org.apache.helix.model.Message;
@@ -57,7 +58,7 @@ public class HelixStateTransitionHandler extends MessageHandler {
     }
   }
 
-  private static Logger logger = Logger.getLogger(HelixStateTransitionHandler.class);
+  private static final Logger logger = Logger.getLogger(HelixStateTransitionHandler.class);
   private final StateModel _stateModel;
   StatusUpdateUtil _statusUpdateUtil;
   private final StateModelParser _transitionMethodFinder;
@@ -110,6 +111,43 @@ public class HelixStateTransitionHandler extends MessageHandler {
       logger.error(errorMessage);
       throw new HelixStateMismatchException(errorMessage);
     }
+
+    // Reset the REQUESTED_STATE property if it exists.
+    try
+    {
+      String instance = _manager.getInstanceName();
+      String sessionId = _message.getTgtSessionId();
+      String resource = _message.getResourceName();
+      ZNRecordBucketizer bucketizer = new ZNRecordBucketizer(_message.getBucketSize());
+      PropertyKey key = accessor.keyBuilder().currentState(instance,
+                                                           sessionId,
+                                                           resource,
+                                                           bucketizer.getBucketName(partitionName));
+      ZNRecord rec = new ZNRecord(resource);
+      Map<String, String> map = new TreeMap<String, String>();
+      map.put(CurrentState.CurrentStateProperty.REQUESTED_STATE.name(), null);
+      rec.getMapFields().put(partitionName, map);
+      ZNRecordDelta delta = new ZNRecordDelta(rec, ZNRecordDelta.MergeOperation.SUBTRACT);
+      List<ZNRecordDelta> deltaList = new ArrayList<ZNRecordDelta>();
+      deltaList.add(delta);
+      CurrentState currStateUpdate = new CurrentState(resource);
+      currStateUpdate.setDeltaList(deltaList);
+
+      // Update the ZK current state of the node
+      accessor.updateProperty(key, currStateUpdate);
+    }
+    catch (Exception e)
+    {
+      logger.error("Error when removing " +
+                       CurrentState.CurrentStateProperty.REQUESTED_STATE.name() +  " from current state.", e);
+      StateTransitionError error = new StateTransitionError(ErrorType.FRAMEWORK, ErrorCode.ERROR, e);
+      _stateModel.rollbackOnError(_message, _notificationContext, error);
+      _statusUpdateUtil.logError(_message,
+                                 HelixStateTransitionHandler.class,
+                                 e,
+                                 "Error when removing " + CurrentState.CurrentStateProperty.REQUESTED_STATE.name() +  " from current state.",
+                                 accessor);
+    }
   }
 
   void postHandleMessage() {
@@ -138,6 +176,9 @@ public class HelixStateTransitionHandler extends MessageHandler {
       return;
     }
 
+    // Set the INFO property.
+    _currentStateDelta.setInfo(partitionKey, taskResult.getInfo());
+
     if (taskResult.isSuccess()) {
       // String fromState = message.getFromState();
       String toState = _message.getToState();
@@ -147,10 +188,9 @@ public class HelixStateTransitionHandler extends MessageHandler {
         // for "OnOfflineToDROPPED" message, we need to remove the resource key record
         // from the current state of the instance because the resource key is dropped.
         // In the state model it will be stayed as "OFFLINE", which is OK.
-        ZNRecordDelta delta =
-            new ZNRecordDelta(_currentStateDelta.getRecord(), MergeOperation.SUBTRACT);
-        // Don't subtract simple fields since they contain stateModelDefRef
-        delta._record.getSimpleFields().clear();
+        ZNRecord rec = new ZNRecord(_currentStateDelta.getId());
+        rec.getMapFields().put(partitionKey, null);
+        ZNRecordDelta delta = new ZNRecordDelta(rec, MergeOperation.SUBTRACT);
 
         List<ZNRecordDelta> deltaList = new ArrayList<ZNRecordDelta>();
         deltaList.add(delta);
@@ -288,15 +328,28 @@ public class HelixStateTransitionHandler extends MessageHandler {
     String fromState = message.getFromState();
     String toState = message.getToState();
     methodToInvoke =
-        _transitionMethodFinder.getMethodForTransition(_stateModel.getClass(), fromState, toState,
-            new Class[] {
-                Message.class, NotificationContext.class
-            });
+        _transitionMethodFinder.getMethodForTransition(_stateModel.getClass(),
+                                                       fromState,
+                                                       toState,
+                                                       new Class[] { Message.class,
+                                                           NotificationContext.class });
     if (methodToInvoke != null) {
-      methodToInvoke.invoke(_stateModel, new Object[] {
-          message, context
-      });
+      logger.info(String.format("Instance %s, partition %s received state transition from %s to %s on session %s.",
+                                message.getTgtName(),
+                                message.getPartitionName(),
+                                message.getFromState(),
+                                message.getToState(),
+                                message.getTgtSessionId()));
+
+      Object result = methodToInvoke.invoke(_stateModel, new Object[] { message, context });
       taskResult.setSuccess(true);
+      String resultStr;
+      if (result == null || result instanceof Void) {
+        resultStr = "";
+      } else {
+        resultStr = result.toString();
+      }
+      taskResult.setInfo(resultStr);
     } else {
       String errorMessage =
           "Unable to find method for transition from " + fromState + " to " + toState + " in "

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixTaskResult.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixTaskResult.java b/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixTaskResult.java
index 22c4fcd..ced9c65 100644
--- a/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixTaskResult.java
+++ b/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixTaskResult.java
@@ -26,6 +26,7 @@ public class HelixTaskResult {
 
   private boolean _success;
   private String _message = "";
+  private String _info = "";
   private Map<String, String> _taskResultMap = new HashMap<String, String>();
   private boolean _interrupted = false;
   Exception _exception = null;
@@ -54,6 +55,14 @@ public class HelixTaskResult {
     this._message = message;
   }
 
+  public String getInfo() {
+    return _info;
+  }
+
+  public void setInfo(String info) {
+    _info = info;
+  }
+
   public Map<String, String> getTaskResultMap() {
     return _taskResultMap;
   }

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/model/CurrentState.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/model/CurrentState.java b/helix-core/src/main/java/org/apache/helix/model/CurrentState.java
index 32854ab..47bccb9 100644
--- a/helix-core/src/main/java/org/apache/helix/model/CurrentState.java
+++ b/helix-core/src/main/java/org/apache/helix/model/CurrentState.java
@@ -39,6 +39,8 @@ public class CurrentState extends HelixProperty {
   public enum CurrentStateProperty {
     SESSION_ID,
     CURRENT_STATE,
+    REQUESTED_STATE,
+    INFO,
     STATE_MODEL_DEF,
     STATE_MODEL_FACTORY_NAME,
     RESOURCE // ,
@@ -115,6 +117,24 @@ public class CurrentState extends HelixProperty {
     return null;
   }
 
+  public String getInfo(String partitionName) {
+    Map<String, Map<String, String>> mapFields = _record.getMapFields();
+    Map<String, String> mapField = mapFields.get(partitionName);
+    if (mapField != null) {
+      return mapField.get(CurrentStateProperty.INFO.name());
+    }
+    return null;
+  }
+
+  public String getRequestedState(String partitionName) {
+    Map<String, Map<String, String>> mapFields = _record.getMapFields();
+    Map<String, String> mapField = mapFields.get(partitionName);
+    if (mapField != null) {
+      return mapField.get(CurrentStateProperty.REQUESTED_STATE.name());
+    }
+    return null;
+  }
+
   /**
    * Set the state model that the resource follows
    * @param stateModelName an identifier of the state model
@@ -144,6 +164,22 @@ public class CurrentState extends HelixProperty {
     mapFields.get(partitionName).put(CurrentStateProperty.CURRENT_STATE.toString(), state);
   }
 
+  public void setInfo(String partitionName, String info) {
+    Map<String, Map<String, String>> mapFields = _record.getMapFields();
+    if (mapFields.get(partitionName) == null) {
+      mapFields.put(partitionName, new TreeMap<String, String>());
+    }
+    mapFields.get(partitionName).put(CurrentStateProperty.INFO.name(), info);
+  }
+
+  public void setRequestedState(String partitionName, String state) {
+    Map<String, Map<String, String>> mapFields = _record.getMapFields();
+    if (mapFields.get(partitionName) == null) {
+      mapFields.put(partitionName, new TreeMap<String, String>());
+    }
+    mapFields.get(partitionName).put(CurrentStateProperty.REQUESTED_STATE.name(), state);
+  }
+
   /**
    * Set the state model factory
    * @param factoryName the name of the factory

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/model/ResourceAssignment.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/model/ResourceAssignment.java b/helix-core/src/main/java/org/apache/helix/model/ResourceAssignment.java
index 2b3d14d..7943ea2 100644
--- a/helix-core/src/main/java/org/apache/helix/model/ResourceAssignment.java
+++ b/helix-core/src/main/java/org/apache/helix/model/ResourceAssignment.java
@@ -25,6 +25,8 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.helix.HelixProperty;
+import org.apache.helix.ZNRecord;
+
 
 /**
  * Represents the assignments of replicas for an entire resource, keyed on partitions of the
@@ -48,6 +50,14 @@ public class ResourceAssignment extends HelixProperty {
   }
 
   /**
+   * Initialize a mapping from a {@link ZNRecord}.
+   * @param record The underlying ZNRecord.
+   */
+  public ResourceAssignment(ZNRecord record) {
+    super(record);
+  }
+
+  /**
    * Initialize a mapping from an existing ResourceMapping
    * @param existingMapping pre-populated ResourceMapping
    */
@@ -55,6 +65,10 @@ public class ResourceAssignment extends HelixProperty {
     super(existingMapping);
   }
 
+  public String getResourceName() {
+    return _record.getId();
+  }
+
   /**
    * Get the currently mapped partitions
    * @return list of Partition objects

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TargetState.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TargetState.java b/helix-core/src/main/java/org/apache/helix/task/TargetState.java
new file mode 100644
index 0000000..a84c7ea
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TargetState.java
@@ -0,0 +1,25 @@
+package org.apache.helix.task;
+
+
+/**
+ * Enumeration of target states for a task.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public enum TargetState
+{
+  /**
+   * Indicates that the rebalancer must start/resume the task.
+   */
+  START,
+  /**
+   * Indicates that the rebalancer should stop any running task partitions and cease doing any further task
+   * assignments.
+   */
+  STOP,
+  /**
+   * Indicates that the rebalancer must delete this task.
+   */
+  DELETE
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/Task.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/Task.java b/helix-core/src/main/java/org/apache/helix/task/Task.java
new file mode 100644
index 0000000..2741f9e
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/Task.java
@@ -0,0 +1,25 @@
+package org.apache.helix.task;
+
+
+/**
+ * The interface that is to be implemented by a specific task implementation.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public interface Task
+{
+  /**
+   * Execute the task.
+   *
+   * @return A {@link TaskResult} object indicating the status of the task and any additional context information that
+   *         can be interpreted by the specific {@link Task} implementation.
+   */
+  TaskResult run();
+
+  /**
+   * Signals the task to stop execution. The task implementation should carry out any clean up actions that may be
+   * required and return from the {@link #run()} method.
+   */
+  void cancel();
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskConfig.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskConfig.java b/helix-core/src/main/java/org/apache/helix/task/TaskConfig.java
new file mode 100644
index 0000000..f85160a
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskConfig.java
@@ -0,0 +1,333 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+
+/**
+ * Provides a typed interface to task configurations.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskConfig
+{
+  //// Property names ////
+
+  /** The name of the workflow to which the task belongs. */
+  public static final String WORKFLOW_ID = "WorkflowID";
+  /** The name of the target resource. */
+  public static final String TARGET_RESOURCE = "TargetResource";
+  /** The set of the target partition states. The value must be a comma-separated list of partition states. */
+  public static final String TARGET_PARTITION_STATES = "TargetPartitionStates";
+  /** The set of the target partition ids. The value must be a comma-separated list of partition ids. */
+  public static final String TARGET_PARTITIONS = "TargetPartitions";
+  /** The command that is to be run by participants. */
+  public static final String COMMAND = "Command";
+  /** The command configuration to be used by the task partitions. */
+  public static final String COMMAND_CONFIG = "CommandConfig";
+  /** The timeout for a task partitions. */
+  public static final String TIMEOUT_PER_PARTITION = "TimeoutPerPartition";
+  /** The maximum number of times the task rebalancer may attempt to execute a task partitions. */
+  public static final String MAX_ATTEMPTS_PER_PARTITION = "MaxAttemptsPerPartition";
+  /** The number of concurrent tasks that are allowed to run on an instance. */
+  public static final String NUM_CONCURRENT_TASKS_PER_INSTANCE = "ConcurrentTasksPerInstance";
+
+  //// Default property values ////
+
+  public static final long DEFAULT_TIMEOUT_PER_PARTITION = 60 * 60 * 1000; // 1 hr.
+  public static final int DEFAULT_MAX_ATTEMPTS_PER_PARTITION = 10;
+  public static final int DEFAULT_NUM_CONCURRENT_TASKS_PER_INSTANCE = 1;
+
+  private final String _workflow;
+  private final String _targetResource;
+  private final List<Integer> _targetPartitions;
+  private final Set<String> _targetPartitionStates;
+  private final String _command;
+  private final String _commandConfig;
+  private final long _timeoutPerPartition;
+  private final int _numConcurrentTasksPerInstance;
+  private final int _maxAttemptsPerPartition;
+
+  private TaskConfig(String workflow,
+                     String targetResource,
+                     List<Integer> targetPartitions,
+                     Set<String> targetPartitionStates,
+                     String command,
+                     String commandConfig,
+                     long timeoutPerPartition,
+                     int numConcurrentTasksPerInstance,
+                     int maxAttemptsPerPartition)
+  {
+    _workflow = workflow;
+    _targetResource = targetResource;
+    _targetPartitions = targetPartitions;
+    _targetPartitionStates = targetPartitionStates;
+    _command = command;
+    _commandConfig = commandConfig;
+    _timeoutPerPartition = timeoutPerPartition;
+    _numConcurrentTasksPerInstance = numConcurrentTasksPerInstance;
+    _maxAttemptsPerPartition = maxAttemptsPerPartition;
+  }
+
+  public String getWorkflow()
+  {
+    return _workflow == null ? Workflow.UNSPECIFIED : _workflow;
+  }
+
+  public String getTargetResource()
+  {
+    return _targetResource;
+  }
+
+  public List<Integer> getTargetPartitions()
+  {
+    return _targetPartitions;
+  }
+
+  public Set<String> getTargetPartitionStates()
+  {
+    return _targetPartitionStates;
+  }
+
+  public String getCommand()
+  {
+    return _command;
+  }
+
+  public String getCommandConfig()
+  {
+    return _commandConfig;
+  }
+
+  public long getTimeoutPerPartition()
+  {
+    return _timeoutPerPartition;
+  }
+
+  public int getNumConcurrentTasksPerInstance()
+  {
+    return _numConcurrentTasksPerInstance;
+  }
+
+  public int getMaxAttemptsPerPartition()
+  {
+    return _maxAttemptsPerPartition;
+  }
+
+  public Map<String, String> getResourceConfigMap()
+  {
+    Map<String, String> cfgMap = new HashMap<String,String>();
+    cfgMap.put(TaskConfig.WORKFLOW_ID, _workflow);
+    cfgMap.put(TaskConfig.COMMAND, _command);
+    cfgMap.put(TaskConfig.COMMAND_CONFIG, _commandConfig);
+    cfgMap.put(TaskConfig.TARGET_RESOURCE, _targetResource);
+    cfgMap.put(TaskConfig.TARGET_PARTITION_STATES, Joiner.on(",").join(_targetPartitionStates));
+    if (_targetPartitions != null)
+    {
+      cfgMap.put(TaskConfig.TARGET_PARTITIONS, Joiner.on(",").join(_targetPartitions));
+    }
+    cfgMap.put(TaskConfig.TIMEOUT_PER_PARTITION, "" + _timeoutPerPartition);
+    cfgMap.put(TaskConfig.MAX_ATTEMPTS_PER_PARTITION, "" + _maxAttemptsPerPartition);
+
+    return cfgMap;
+  }
+
+  /**
+   * A builder for {@link TaskConfig}. Validates the configurations.
+   */
+  public static class Builder
+  {
+    private String _workflow;
+    private String _targetResource;
+    private List<Integer> _targetPartitions;
+    private Set<String> _targetPartitionStates;
+    private String _command;
+    private String _commandConfig;
+    private long _timeoutPerPartition = DEFAULT_TIMEOUT_PER_PARTITION;
+    private int _numConcurrentTasksPerInstance = DEFAULT_NUM_CONCURRENT_TASKS_PER_INSTANCE;
+    private int _maxAttemptsPerPartition = DEFAULT_MAX_ATTEMPTS_PER_PARTITION;
+
+    public TaskConfig build()
+    {
+      validate();
+
+      return new TaskConfig(_workflow,
+                            _targetResource,
+                            _targetPartitions,
+                            _targetPartitionStates,
+                            _command,
+                            _commandConfig,
+                            _timeoutPerPartition,
+                            _numConcurrentTasksPerInstance,
+                            _maxAttemptsPerPartition);
+    }
+
+    /**
+     * Convenience method to build a {@link TaskConfig} from a {@code Map&lt;String, String&gt;}.
+     *
+     * @param cfg A map of property names to their string representations.
+     *
+     * @return A {@link Builder}.
+     */
+    public static Builder fromMap(Map<String, String> cfg)
+    {
+      Builder b = new Builder();
+      if (cfg.containsKey(WORKFLOW_ID))
+      {
+        b.setWorkflow(cfg.get(WORKFLOW_ID));
+      }
+      if (cfg.containsKey(TARGET_RESOURCE))
+      {
+        b.setTargetResource(cfg.get(TARGET_RESOURCE));
+      }
+      if (cfg.containsKey(TARGET_PARTITIONS))
+      {
+        b.setTargetPartitions(csvToIntList(cfg.get(TARGET_PARTITIONS)));
+      }
+      if (cfg.containsKey(TARGET_PARTITION_STATES))
+      {
+        b.setTargetPartitionStates(new HashSet<String>(Arrays.asList(cfg.get(TARGET_PARTITION_STATES).split(","))));
+      }
+      if (cfg.containsKey(COMMAND))
+      {
+        b.setCommand(cfg.get(COMMAND));
+      }
+      if (cfg.containsKey(COMMAND_CONFIG))
+      {
+        b.setCommandConfig(cfg.get(COMMAND_CONFIG));
+      }
+      if (cfg.containsKey(TIMEOUT_PER_PARTITION))
+      {
+        b.setTimeoutPerPartition(Long.parseLong(cfg.get(TIMEOUT_PER_PARTITION)));
+      }
+      if (cfg.containsKey(NUM_CONCURRENT_TASKS_PER_INSTANCE))
+      {
+        b.setNumConcurrentTasksPerInstance(Integer.parseInt(cfg.get(NUM_CONCURRENT_TASKS_PER_INSTANCE)));
+      }
+      if (cfg.containsKey(MAX_ATTEMPTS_PER_PARTITION))
+      {
+        b.setMaxAttemptsPerPartition(Integer.parseInt(cfg.get(MAX_ATTEMPTS_PER_PARTITION)));
+      }
+
+      return b;
+    }
+
+    public Builder setWorkflow(String v)
+    {
+      _workflow = v;
+      return this;
+    }
+
+    public Builder setTargetResource(String v)
+    {
+      _targetResource = v;
+      return this;
+    }
+
+    public Builder setTargetPartitions(List<Integer> v)
+    {
+      _targetPartitions = ImmutableList.copyOf(v);
+      return this;
+    }
+
+    public Builder setTargetPartitionStates(Set<String> v)
+    {
+      _targetPartitionStates = ImmutableSet.copyOf(v);
+      return this;
+    }
+
+    public Builder setCommand(String v)
+    {
+      _command = v;
+      return this;
+    }
+
+    public Builder setCommandConfig(String v)
+    {
+      _commandConfig = v;
+      return this;
+    }
+
+    public Builder setTimeoutPerPartition(long v)
+    {
+      _timeoutPerPartition = v;
+      return this;
+    }
+
+    public Builder setNumConcurrentTasksPerInstance(int v)
+    {
+      _numConcurrentTasksPerInstance = v;
+      return this;
+    }
+
+    public Builder setMaxAttemptsPerPartition(int v)
+    {
+      _maxAttemptsPerPartition = v;
+      return this;
+    }
+
+    private void validate()
+    {
+      if (_targetResource == null)
+      {
+        throw new IllegalArgumentException(String.format("%s cannot be null", TARGET_RESOURCE));
+      }
+      if (_targetPartitionStates != null && _targetPartitionStates.isEmpty())
+      {
+        throw new IllegalArgumentException(String.format("%s cannot be an empty set",
+                                                         TARGET_PARTITION_STATES));
+      }
+      if (_command == null)
+      {
+        throw new IllegalArgumentException(String.format("%s cannot be null", COMMAND));
+      }
+      if (_timeoutPerPartition < 0)
+      {
+        throw new IllegalArgumentException(String.format("%s has invalid value %s",
+                                                         TIMEOUT_PER_PARTITION,
+                                                         _timeoutPerPartition));
+      }
+      if (_numConcurrentTasksPerInstance < 1)
+      {
+        throw new IllegalArgumentException(String.format("%s has invalid value %s",
+                                                         NUM_CONCURRENT_TASKS_PER_INSTANCE,
+                                                         _numConcurrentTasksPerInstance));
+      }
+      if (_maxAttemptsPerPartition < 1)
+      {
+        throw new IllegalArgumentException(String.format("%s has invalid value %s",
+                                                         MAX_ATTEMPTS_PER_PARTITION,
+                                                         _maxAttemptsPerPartition));
+      }
+      if(_workflow == null)
+      {
+        throw new IllegalArgumentException(String.format("%s cannot be null", WORKFLOW_ID));
+      }
+    }
+
+    private static List<Integer> csvToIntList(String csv)
+    {
+      String[] vals = csv.split(",");
+      List<Integer> l = new ArrayList<Integer>();
+      for (String v : vals)
+      {
+        l.add(Integer.parseInt(v));
+      }
+
+      return l;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskConstants.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskConstants.java b/helix-core/src/main/java/org/apache/helix/task/TaskConstants.java
new file mode 100644
index 0000000..4ff8f0a
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskConstants.java
@@ -0,0 +1,31 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+/**
+ * Constants used in the task framework.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskConstants
+{
+  /**
+   * The name of the {@link Task} state model.
+   */
+  public static final String STATE_MODEL_NAME = "Task";
+  /**
+   * Field in workflow resource config housing dag
+   */
+  public static final String WORKFLOW_DAG_FIELD = "dag";
+  /**
+   * Field in workflow resource config for flow name
+   */
+  public static final String WORKFLOW_NAME_FIELD = "name";
+  /**
+   * The root property store path at which the {@link TaskRebalancer} stores context information.
+   */
+  public static final String REBALANCER_CONTEXT_ROOT = "/TaskRebalancer";
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskContext.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskContext.java b/helix-core/src/main/java/org/apache/helix/task/TaskContext.java
new file mode 100644
index 0000000..59f15f0
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskContext.java
@@ -0,0 +1,147 @@
+/*
+ * $id$
+ */
+package org.apache.helix.task;
+
+
+import java.util.Map;
+import java.util.TreeMap;
+import org.apache.helix.HelixProperty;
+import org.apache.helix.ZNRecord;
+
+
+/**
+ * Provides a typed interface to the context information stored by {@link TaskRebalancer} in the Helix property store.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskContext extends HelixProperty
+{
+  public static final String START_TIME = "START_TIME";
+  public static final String PARTITION_STATE = "STATE";
+  public static final String NUM_ATTEMPTS = "NUM_ATTEMPTS";
+  public static final String FINISH_TIME = "FINISH_TIME";
+
+  public TaskContext(ZNRecord record)
+  {
+    super(record);
+  }
+
+  public void setStartTime(long t)
+  {
+    _record.setSimpleField(START_TIME, String.valueOf(t));
+  }
+
+  public long getStartTime()
+  {
+    String tStr = _record.getSimpleField(START_TIME);
+    if (tStr == null)
+    {
+      return -1;
+    }
+
+    return Long.parseLong(tStr);
+  }
+
+  public void setPartitionState(int p, TaskPartitionState s)
+  {
+    String pStr = String.valueOf(p);
+    Map<String, String> map = _record.getMapField(pStr);
+    if (map == null)
+    {
+      map = new TreeMap<String, String>();
+      _record.setMapField(pStr, map);
+    }
+    map.put(PARTITION_STATE, s.name());
+  }
+
+  public TaskPartitionState getPartitionState(int p)
+  {
+    Map<String, String> map = _record.getMapField(String.valueOf(p));
+    if (map == null)
+    {
+      return null;
+    }
+
+    String str = map.get(PARTITION_STATE);
+    if (str != null)
+    {
+      return TaskPartitionState.valueOf(str);
+    }
+    else
+    {
+      return null;
+    }
+  }
+
+  public void setPartitionNumAttempts(int p, int n)
+  {
+    String pStr = String.valueOf(p);
+    Map<String, String> map = _record.getMapField(pStr);
+    if (map == null)
+    {
+      map = new TreeMap<String, String>();
+      _record.setMapField(pStr, map);
+    }
+    map.put(NUM_ATTEMPTS, String.valueOf(n));
+  }
+
+  public int incrementNumAttempts(int pId)
+  {
+    int n = this.getPartitionNumAttempts(pId);
+    if (n < 0)
+    {
+      n = 0;
+    }
+    n += 1;
+    this.setPartitionNumAttempts(pId, n);
+    return n;
+  }
+
+  public int getPartitionNumAttempts(int p)
+  {
+    Map<String, String> map = _record.getMapField(String.valueOf(p));
+    if (map == null)
+    {
+      return -1;
+    }
+
+    String nStr = map.get(NUM_ATTEMPTS);
+    if (nStr == null)
+    {
+      return -1;
+    }
+
+    return Integer.parseInt(nStr);
+  }
+
+  public void setPartitionFinishTime(int p, long t)
+  {
+    String pStr = String.valueOf(p);
+    Map<String, String> map = _record.getMapField(pStr);
+    if (map == null)
+    {
+      map = new TreeMap<String, String>();
+      _record.setMapField(pStr, map);
+    }
+    map.put(FINISH_TIME, String.valueOf(t));
+  }
+
+  public long getPartitionFinishTime(int p)
+  {
+    Map<String, String> map = _record.getMapField(String.valueOf(p));
+    if (map == null)
+    {
+      return -1;
+    }
+
+    String tStr = map.get(FINISH_TIME);
+    if (tStr == null)
+    {
+      return -1;
+    }
+
+    return Long.parseLong(tStr);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskDag.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskDag.java b/helix-core/src/main/java/org/apache/helix/task/TaskDag.java
new file mode 100644
index 0000000..009d73d
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskDag.java
@@ -0,0 +1,157 @@
+package org.apache.helix.task;
+
+import org.codehaus.jackson.annotate.JsonProperty;
+import org.codehaus.jackson.map.ObjectMapper;
+
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/**
+ * Provides a convenient way to construct, traverse,
+ * and validate a task dependency graph
+ *
+ * @author Chris Beavers <cb...@linkedin.com>
+ */
+public class TaskDag
+{
+  @JsonProperty("parentsToChildren")
+  private Map<String, Set<String>> _parentsToChildren;
+
+  @JsonProperty("childrenToParents")
+  private Map<String, Set<String>> _childrenToParents;
+
+  @JsonProperty("allNodes")
+  private Set<String> _allNodes;
+
+  public static final TaskDag EMPTY_DAG = new TaskDag();
+
+  public TaskDag()
+  {
+    _parentsToChildren = new TreeMap<String, Set<String>>();
+    _childrenToParents = new TreeMap<String, Set<String>>();
+    _allNodes = new TreeSet<String>();
+  }
+
+  public void addParentToChild(String parent, String child)
+  {
+    if(!_parentsToChildren.containsKey(parent))
+    {
+      _parentsToChildren.put(parent, new TreeSet<String>());
+    }
+    _parentsToChildren.get(parent).add(child);
+
+    if(!_childrenToParents.containsKey(child))
+    {
+      _childrenToParents.put(child, new TreeSet<String>());
+    }
+    _childrenToParents.get(child).add(parent);
+
+    _allNodes.add(parent);
+    _allNodes.add(child);
+  }
+
+  public void addNode(String node)
+  {
+    _allNodes.add(node);
+  }
+
+  public Map<String, Set<String>> getParentsToChildren()
+  {
+    return _parentsToChildren;
+  }
+
+  public Map<String, Set<String>> getChildrenToParents()
+  {
+    return _childrenToParents;
+  }
+
+  public Set<String> getAllNodes()
+  {
+    return _allNodes;
+  }
+
+  public Set<String> getDirectChildren(String node)
+  {
+    if(!_parentsToChildren.containsKey(node))
+    {
+      return new TreeSet<String>();
+    }
+    return _parentsToChildren.get(node);
+  }
+
+  public Set<String> getDirectParents(String node)
+  {
+    if(!_childrenToParents.containsKey(node))
+    {
+      return new TreeSet<String>();
+    }
+    return _childrenToParents.get(node);
+  }
+
+  public String toJson() throws Exception
+  {
+    return new ObjectMapper().writeValueAsString(this);
+  }
+
+  public static TaskDag fromJson(String json)
+  {
+    try
+    {
+      return new ObjectMapper().readValue(json, TaskDag.class);
+    }
+    catch(Exception e)
+    {
+      throw new IllegalArgumentException("Unable to parse json " + json + " into task dag");
+    }
+  }
+
+  /**
+   * Checks that dag contains no cycles and all nodes are reachable.
+   */
+  public void validate()
+  {
+    Set<String> prevIteration = new TreeSet<String>();
+
+    // get all unparented nodes
+    for(String node : _allNodes)
+    {
+      if(getDirectParents(node).isEmpty())
+      {
+        prevIteration.add(node);
+      }
+    }
+
+    // visit children nodes up to max iteration count, by which point we should have exited naturally
+    Set<String> allNodesReached = new TreeSet<String>();
+    int iterationCount = 0;
+    int maxIterations = _allNodes.size() + 1;
+
+    while(!prevIteration.isEmpty() && iterationCount < maxIterations)
+    {
+      // construct set of all children reachable from prev iteration
+      Set<String> thisIteration = new TreeSet<String>();
+      for(String node : prevIteration)
+      {
+        thisIteration.addAll(getDirectChildren(node));
+      }
+
+      allNodesReached.addAll(prevIteration);
+      prevIteration = thisIteration;
+      iterationCount++;
+    }
+
+    allNodesReached.addAll(prevIteration);
+
+    if(iterationCount >= maxIterations)
+    {
+      throw new IllegalArgumentException("DAG invalid: cycles detected");
+    }
+
+    if(!allNodesReached.containsAll(_allNodes))
+    {
+      throw new IllegalArgumentException("DAG invalid: unreachable nodes found. Reachable set is " + allNodesReached);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskDriver.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskDriver.java b/helix-core/src/main/java/org/apache/helix/task/TaskDriver.java
new file mode 100644
index 0000000..5ce1c31
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskDriver.java
@@ -0,0 +1,382 @@
+package org.apache.helix.task;
+
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.OptionGroup;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixDataAccessor;
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.HelixProperty;
+import org.apache.helix.InstanceType;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.builder.CustomModeISBuilder;
+import org.apache.log4j.Logger;
+
+
+/**
+ * CLI for scheduling/canceling workflows
+ *
+ * @author Chris Beavers <cb...@linkedin.com>
+ */
+public class TaskDriver
+{
+  /** For logging */
+  private static final Logger LOG = Logger.getLogger(TaskDriver.class);
+
+  /** Required option name for Helix endpoint */
+  private static final String ZK_ADDRESS = "zk";
+
+  /** Required option name for cluster against which to run task */
+  private static final String CLUSTER_NAME_OPTION = "cluster";
+
+  /** Required option name for task resource within target cluster */
+  private static final String RESOURCE_OPTION = "resource";
+
+  /** Field for specifying a workflow file when starting a job */
+  private static final String WORKFLOW_FILE_OPTION = "file";
+
+  private final HelixManager _manager;
+  private final HelixAdmin _admin;
+  private final String _clusterName;
+
+  /** Commands which may be parsed from the first argument to main */
+  private enum DriverCommand {
+    start, stop, delete, resume, list
+  }
+
+  public TaskDriver(HelixManager manager)
+  {
+    _manager = manager;
+    _clusterName = manager.getClusterName();
+    _admin = manager.getClusterManagmentTool();
+  }
+
+  /**
+   * Parses the first argument as a driver command and the rest of the
+   * arguments are parsed based on that command. Constructs a Helix
+   * message and posts it to the controller
+   */
+  public static void main(String[] args) throws Exception
+  {
+    String[] cmdArgs = Arrays.copyOfRange(args, 1, args.length);
+    CommandLine cl = parseOptions(cmdArgs, constructOptions(), args[0]);
+    String zkAddr = cl.getOptionValue(ZK_ADDRESS);
+    String clusterName = cl.getOptionValue(CLUSTER_NAME_OPTION);
+    String resource = cl.getOptionValue(RESOURCE_OPTION);
+
+    if(zkAddr == null || clusterName == null || resource == null)
+    {
+      printUsage(constructOptions(), "[cmd]");
+      throw new IllegalArgumentException("zk, cluster, and resource must all be non-null for all commands");
+    }
+
+    HelixManager helixMgr = HelixManagerFactory.getZKHelixManager(clusterName,
+                                                                  "Admin",
+                                                                  InstanceType.ADMINISTRATOR,
+                                                                  zkAddr);
+    helixMgr.connect();
+    TaskDriver driver = new TaskDriver(helixMgr);
+    try
+    {
+      DriverCommand cmd = DriverCommand.valueOf(args[0]);
+      switch(cmd)
+      {
+        case start:
+          if(cl.hasOption(WORKFLOW_FILE_OPTION))
+          {
+            driver.start(Workflow.parse(new File(cl.getOptionValue(WORKFLOW_FILE_OPTION))));
+          }
+          else
+          {
+            throw new IllegalArgumentException("Workflow file is required to start flow.");
+          }
+          break;
+        case stop:
+          driver.setTaskTargetState(resource, TargetState.STOP);
+          break;
+        case resume:
+          driver.setTaskTargetState(resource, TargetState.START);
+          break;
+        case delete:
+          driver.setTaskTargetState(resource, TargetState.DELETE);
+          break;
+        case list:
+          driver.list(resource);
+        default:
+          throw new IllegalArgumentException("Unknown command " + args[0]);
+      }
+    }
+    catch(IllegalArgumentException e)
+    {
+      LOG.error("Unknown driver command " + args[0]);
+      throw e;
+    }
+
+    helixMgr.disconnect();
+  }
+
+  /** Schedules a new workflow */
+  public void start(Workflow flow) throws Exception
+  {
+    // TODO: check that namespace for workflow is available
+    LOG.info("Starting workflow " + flow.getName());
+    flow.validate();
+
+    String flowName = flow.getName();
+
+    // first, add workflow config to ZK
+    _admin.setConfig(TaskUtil.getResourceConfigScope(_clusterName, flowName),
+              flow.getResourceConfigMap());
+
+    // then schedule tasks
+    for(String task : flow.getTaskConfigs().keySet())
+    {
+      scheduleTask(task, TaskConfig.Builder.fromMap(flow.getTaskConfigs().get(task)).build());
+    }
+  }
+
+  /** Posts new task to cluster */
+  private void scheduleTask(String taskResource, TaskConfig taskConfig) throws Exception
+  {
+    // Set up task resource based on partitions from target resource
+    int numPartitions = _admin.getResourceIdealState(_clusterName, taskConfig.getTargetResource()).getPartitionSet().size();
+    _admin.addResource(_clusterName, taskResource, numPartitions, TaskConstants.STATE_MODEL_NAME);
+    _admin.setConfig(TaskUtil.getResourceConfigScope(_clusterName, taskResource), taskConfig.getResourceConfigMap());
+
+    // Push out new ideal state based on number of target partitions
+    CustomModeISBuilder builder = new CustomModeISBuilder(taskResource);
+    builder.setRebalancerMode(IdealState.RebalanceMode.USER_DEFINED);
+    builder.setNumReplica(1);
+    builder.setNumPartitions(numPartitions);
+    builder.setStateModel(TaskConstants.STATE_MODEL_NAME);
+    for (int i = 0; i < numPartitions; i++)
+    {
+      builder.add(taskResource + "_" + i);
+    }
+    IdealState is = builder.build();
+    is.setRebalancerClassName(TaskRebalancer.class.getName());
+    _admin.setResourceIdealState(_clusterName, taskResource, is);
+  }
+
+  /** Public method to resume a task/workflow */
+  public void resume(String resource)
+  {
+    setTaskTargetState(resource, TargetState.START);
+  }
+
+  /** Public method to stop a task/workflow */
+  public void stop(String resource)
+  {
+    setTaskTargetState(resource, TargetState.STOP);
+  }
+
+  /** Public method to delete a task/workflow */
+  public void delete(String resource)
+  {
+    setTaskTargetState(resource, TargetState.DELETE);
+  }
+
+  /** Helper function to change target state for a given task */
+  private void setTaskTargetState(String taskResource, TargetState state)
+  {
+    HelixDataAccessor accessor = _manager.getHelixDataAccessor();
+    HelixProperty p = new HelixProperty(taskResource);
+    p.getRecord().setSimpleField(WorkflowConfig.TARGET_STATE, state.name());
+    accessor.updateProperty(accessor.keyBuilder().resourceConfig(taskResource), p);
+
+    invokeRebalance();
+  }
+
+  public void list(String resource)
+  {
+    WorkflowConfig wCfg = TaskUtil.getWorkflowCfg(_manager, resource);
+    WorkflowContext wCtx = TaskUtil.getWorkflowContext(_manager, resource);
+
+    LOG.info("Workflow " + resource + " consists of the following tasks: " + wCfg.getTaskDag().getAllNodes());
+    LOG.info("Current state of workflow is " + wCtx.getWorkflowState().name());
+    LOG.info("Task states are: ");
+    LOG.info("-------");
+    for(String task : wCfg.getTaskDag().getAllNodes())
+    {
+      LOG.info("Task " + task + " is " + wCtx.getTaskState(task));
+
+      // fetch task information
+      TaskContext tCtx = TaskUtil.getTaskContext(_manager, task);
+      TaskConfig tCfg = TaskUtil.getTaskCfg(_manager, task);
+
+      // calculate taskPartitions
+      List<Integer> partitions;
+      if(tCfg.getTargetPartitions() != null)
+      {
+        partitions = tCfg.getTargetPartitions();
+      }
+      else
+      {
+        partitions = new ArrayList<Integer>();
+        for(String pStr : _admin.getResourceIdealState(_clusterName, tCfg.getTargetResource()).getPartitionSet())
+        {
+          partitions.add(Integer.parseInt(pStr.substring(pStr.lastIndexOf("_") + 1, pStr.length())));
+        }
+      }
+
+      // group partitions by status
+      Map<TaskPartitionState, Integer> statusCount = new TreeMap<TaskPartitionState, Integer>();
+      for(Integer i : partitions)
+      {
+        TaskPartitionState s = tCtx.getPartitionState(i);
+        if(!statusCount.containsKey(s))
+        {
+          statusCount.put(s, 0);
+        }
+        statusCount.put(s, statusCount.get(s) + 1);
+      }
+
+      for(TaskPartitionState s : statusCount.keySet())
+      {
+        LOG.info(statusCount.get(s) + "/" + partitions.size() + " in state " + s.name());
+      }
+
+      LOG.info("-------");
+    }
+  }
+
+  /** Hack to invoke rebalance until bug concerning resource config changes not driving rebalance is fixed */
+  public void invokeRebalance()
+  {
+    // find a task
+    for(String resource : _admin.getResourcesInCluster(_clusterName))
+    {
+      IdealState is = _admin.getResourceIdealState(_clusterName, resource);
+      if(is.getStateModelDefRef().equals(TaskConstants.STATE_MODEL_NAME))
+      {
+        HelixDataAccessor accessor = _manager.getHelixDataAccessor();
+        accessor.updateProperty(accessor.keyBuilder().idealStates(resource), is);
+        break;
+      }
+    }
+  }
+
+  /** Constructs options set for all basic control messages */
+  private static Options constructOptions()
+  {
+    Options options = new Options();
+    options.addOptionGroup(contructGenericRequiredOptionGroup());
+    options.addOptionGroup(constructStartOptionGroup());
+    return options;
+  }
+
+  /** Constructs option group containing options required by all drivable tasks */
+  private static OptionGroup contructGenericRequiredOptionGroup()
+  {
+    Option zkAddressOption = OptionBuilder.isRequired().withLongOpt(ZK_ADDRESS)
+                                          .withDescription("ZK address managing target cluster").create();
+    zkAddressOption.setArgs(1);
+    zkAddressOption.setArgName("zkAddress");
+
+    Option clusterNameOption = OptionBuilder.isRequired().withLongOpt(CLUSTER_NAME_OPTION)
+                                                  .withDescription("Target cluster name").create();
+    clusterNameOption.setArgs(1);
+    clusterNameOption.setArgName("clusterName");
+
+    Option taskResourceOption = OptionBuilder.isRequired().withLongOpt(RESOURCE_OPTION)
+                                            .withDescription("Target workflow or task").create();
+    taskResourceOption.setArgs(1);
+    taskResourceOption.setArgName("resourceName");
+
+    OptionGroup group = new OptionGroup();
+    group.addOption(zkAddressOption);
+    group.addOption(clusterNameOption);
+    group.addOption(taskResourceOption);
+    return group;
+  }
+
+  /** Constructs option group containing options required by all drivable tasks */
+  private static OptionGroup constructStartOptionGroup()
+  {
+    Option workflowFileOption = OptionBuilder.withLongOpt(WORKFLOW_FILE_OPTION)
+                                          .withDescription("Local file describing workflow").create();
+    workflowFileOption.setArgs(1);
+    workflowFileOption.setArgName("workflowFile");
+
+    OptionGroup group = new OptionGroup();
+    group.addOption(workflowFileOption);
+    return group;
+  }
+
+  /** Attempts to parse options for given command, printing usage under failure */
+  private static CommandLine parseOptions(String[] args, Options options, String cmdStr)
+  {
+    CommandLineParser cliParser = new GnuParser();
+    CommandLine cmd = null;
+
+    try
+    {
+      cmd = cliParser.parse(options, args);
+    }
+    catch (ParseException pe)
+    {
+      LOG.error("CommandLineClient: failed to parse command-line options: "
+              + pe.toString());
+      printUsage(options, cmdStr);
+      System.exit(1);
+    }
+    boolean ret = checkOptionArgsNumber(cmd.getOptions());
+    if (!ret)
+    {
+      printUsage(options, cmdStr);
+      System.exit(1);
+    }
+
+    return cmd;
+  }
+
+  /** Ensures options argument counts are correct */
+  private static boolean checkOptionArgsNumber(Option[] options)
+  {
+    for (Option option : options)
+    {
+      int argNb = option.getArgs();
+      String[] args = option.getValues();
+      if (argNb == 0)
+      {
+        if (args != null && args.length > 0)
+        {
+          System.err.println(option.getArgName() + " shall have " + argNb + " arguments (was "
+                  + Arrays.toString(args) + ")");
+          return false;
+        }
+      } else
+      {
+        if (args == null || args.length != argNb)
+        {
+          System.err.println(option.getArgName() + " shall have " + argNb + " arguments (was "
+                  + Arrays.toString(args) + ")");
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  /** Displays CLI usage for given option set and command name */
+  private static void printUsage(Options cliOptions, String cmd)
+  {
+    HelpFormatter helpFormatter = new HelpFormatter();
+    helpFormatter.setWidth(1000);
+    helpFormatter.printHelp("java " + TaskDriver.class.getName() + " " + cmd, cliOptions);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskFactory.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskFactory.java b/helix-core/src/main/java/org/apache/helix/task/TaskFactory.java
new file mode 100644
index 0000000..02d5cf2
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskFactory.java
@@ -0,0 +1,23 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+/**
+ * A factory for {@link Task} objects.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public interface TaskFactory
+{
+  /**
+   * Returns a {@link Task} instance.
+   *
+   * @param config Configuration information for the task.
+   *
+   * @return A {@link Task} instance.
+   */
+  Task createNewTask(String config);
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskPartitionState.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskPartitionState.java b/helix-core/src/main/java/org/apache/helix/task/TaskPartitionState.java
new file mode 100644
index 0000000..245bb7a
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskPartitionState.java
@@ -0,0 +1,31 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+/**
+ * Enumeration of the states in the "Task" state model.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public enum TaskPartitionState
+{
+  /** The initial state of the state model. */
+  INIT,
+  /** Indicates that the task is currently running. */
+  RUNNING,
+  /** Indicates that the task was stopped by the controller. */
+  STOPPED,
+  /** Indicates that the task completed normally. */
+  COMPLETED,
+  /** Indicates that the task timed out. */
+  TIMED_OUT,
+  /** Indicates an error occurred during task execution. */
+  TASK_ERROR,
+  /** Helix's own internal error state. */
+  ERROR,
+  /** A Helix internal state. */
+  DROPPED
+}


[10/15] Adding Helix-task-framework and Yarn integration modules

Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModel.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModel.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModel.java
new file mode 100644
index 0000000..814387f
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModel.java
@@ -0,0 +1,114 @@
+package org.apache.helix.autoscale.provider;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.autoscale.ClusterAdmin;
+import org.apache.helix.autoscale.ContainerProvider;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Helix state model implementation for {@link ContainerProvider}s. Updates
+ * configuration of managed Helix cluster and spawns and destroys container
+ * instances.
+ * 
+ */
+@StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE" })
+public class ProviderStateModel extends StateModel {
+
+    static final Logger log = Logger.getLogger(ProviderStateModel.class);
+
+    ContainerProvider   provider;
+    ClusterAdmin        admin;
+
+    public ProviderStateModel(ContainerProvider provider, ClusterAdmin admin) {
+        this.provider = provider;
+        this.admin = admin;
+    }
+
+    @Transition(from = "OFFLINE", to = "ONLINE")
+    public void acquire(Message m, NotificationContext context) throws Exception {
+        String containerType = m.getResourceName();
+        String containerId = m.getPartitionName();
+        String instanceId = context.getManager().getInstanceName();
+
+        log.trace(String.format("%s:%s transitioning from OFFLINE to ONLINE", containerId, instanceId));
+
+        bestEffortRemove(containerId);
+
+        // add instance to cluster
+        admin.addInstance(containerId, containerType);
+
+        // create container
+        provider.create(containerId, containerType);
+
+        try {
+            admin.rebalance();
+        } catch (Exception e) {
+            // ignore
+            log.warn(String.format("rebalancing cluster failed (error='%s')", e.getMessage()));
+        }
+
+        log.info(String.format("%s acquired container '%s' (type='%s')", instanceId, containerId, containerType));
+    }
+
+    @Transition(from = "ONLINE", to = "OFFLINE")
+    public void release(Message m, NotificationContext context) {
+        String containerId = m.getPartitionName();
+        String instanceId = context.getManager().getInstanceName();
+
+        log.trace(String.format("%s:%s transitioning from ONLINE to OFFLINE", containerId, instanceId));
+
+        bestEffortRemove(containerId);
+
+        try {
+            admin.rebalance();
+        } catch (Exception e) {
+            // ignore
+            log.warn(String.format("rebalancing cluster failed (error='%s')", e.getMessage()));
+        }
+
+        log.info(String.format("%s destroyed container '%s'", instanceId, containerId));
+
+    }
+
+    @Transition(from = "ERROR", to = "OFFLINE")
+    public void recover(Message m, NotificationContext context) {
+        String containerId = m.getPartitionName();
+        String instanceId = context.getManager().getInstanceName();
+
+        log.trace(String.format("%s:%s transitioning from ERROR to OFFLINE", containerId, instanceId));
+
+        release(m, context);
+    }
+
+    @Transition(from = "OFFLINE", to = "DROPPED")
+    public void drop(Message m, NotificationContext context) {
+        String containerId = m.getPartitionName();
+        String instanceId = context.getManager().getInstanceName();
+
+        log.trace(String.format("%s:%s transitioning from OFFLINE to DROPPED", containerId, instanceId));
+    }
+
+    private void bestEffortRemove(String containerId) {
+        log.debug(String.format("Best effort removal of container '%s'", containerId));
+
+        try {
+            provider.destroy(containerId);
+            log.debug(String.format("Container '%s' destroyed", containerId));
+        } catch (Exception e) {
+            log.debug(String.format("Container '%s' does not exist", containerId));
+        }
+
+        try {
+            admin.removeInstance(containerId);
+            log.debug(String.format("Instance '%s' removed", containerId));
+        } catch (Exception e) {
+            log.debug(String.format("Instance '%s' does not exist", containerId));
+        }
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModelFactory.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModelFactory.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModelFactory.java
new file mode 100644
index 0000000..2613336
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModelFactory.java
@@ -0,0 +1,27 @@
+package org.apache.helix.autoscale.provider;
+
+import org.apache.helix.autoscale.ClusterAdmin;
+import org.apache.helix.autoscale.ContainerProvider;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+
+/**
+ * Factory for {@link ProviderStateModel}. Injects {@link ClusterAdmin} for
+ * managed cluster and {@link ContainerProvider}.
+ * 
+ */
+class ProviderStateModelFactory extends StateModelFactory<ProviderStateModel> {
+
+    final ContainerProvider provider;
+    final ClusterAdmin      admin;
+
+    public ProviderStateModelFactory(ContainerProvider provider, ClusterAdmin admin) {
+        super();
+        this.provider = provider;
+        this.admin = admin;
+    }
+
+    @Override
+    public ProviderStateModel createNewStateModel(String partitionName) {
+        return new ProviderStateModel(provider, admin);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/Boot2By2Local.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/Boot2By2Local.properties b/recipes/auto-scale/src/main/resources/Boot2By2Local.properties
new file mode 100644
index 0000000..13fb4ff
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/Boot2By2Local.properties
@@ -0,0 +1,87 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/autoscale/zk/data
+zookeeper.logdir=/tmp/autoscale/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=localhost:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=localhost:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=localhost:2199
+metaresource.0.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=localhost:2199
+metaresource.1.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=localhost:2199
+metaprovider.0.class=org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=localhost:2199
+metaprovider.1.class=org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.autoscale.impl.local.LocalStatusProvider
+
+metacontroller.target.class=org.apache.helix.autoscale.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/Boot2By2Shell.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/Boot2By2Shell.properties b/recipes/auto-scale/src/main/resources/Boot2By2Shell.properties
new file mode 100644
index 0000000..079771b
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/Boot2By2Shell.properties
@@ -0,0 +1,87 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/autoscale/zk/data
+zookeeper.logdir=/tmp/autoscale/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=localhost:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=localhost:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=localhost:2199
+metaresource.0.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=localhost:2199
+metaresource.1.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=localhost:2199
+metaprovider.0.class=org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=localhost:2199
+metaprovider.1.class=org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.autoscale.impl.shell.ShellStatusProvider
+
+metacontroller.target.class=org.apache.helix.autoscale.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/Boot2By2Yarn.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/Boot2By2Yarn.properties b/recipes/auto-scale/src/main/resources/Boot2By2Yarn.properties
new file mode 100644
index 0000000..e447711
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/Boot2By2Yarn.properties
@@ -0,0 +1,98 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/autoscale/zk/data
+zookeeper.logdir=/tmp/autoscale/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=rm:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=rm:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=rm:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=rm:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=rm:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=rm:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=rm:2199
+metaresource.0.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=rm:2199
+metaresource.1.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=rm:2199
+metaprovider.0.class=org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess
+metaprovider.0.yarndata=rm:2199
+metaprovider.0.resourcemananger=rm:8032
+metaprovider.0.scheduler=rm:8030
+metaprovider.0.user=yarn
+metaprovider.0.hdfs=hdfs://rm:9000/
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=rm:2199
+metaprovider.1.class=org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess
+metaprovider.1.yarndata=rm:2199
+metaprovider.1.resourcemananger=rm:8032
+metaprovider.1.scheduler=rm:8030
+metaprovider.1.user=yarn
+metaprovider.1.hdfs=hdfs://rm:9000/
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=rm:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.autoscale.impl.yarn.YarnStatusProvider
+metacontroller.status.yarndata=rm:2199
+
+metacontroller.target.class=org.apache.helix.autoscale.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/BootLocal.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/BootLocal.properties b/recipes/auto-scale/src/main/resources/BootLocal.properties
new file mode 100644
index 0000000..15905fc
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/BootLocal.properties
@@ -0,0 +1,68 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/autoscale/zk/data
+zookeeper.logdir=/tmp/autoscale/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.name=resource
+resource.cluster=cluster
+resource.address=localhost:2199
+resource.container=container
+resource.model=MasterSlave
+resource.partitions=10
+resource.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.name=container
+metaresource.metacluster=meta
+metaresource.metaaddress=localhost:2199
+metaresource.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.name=provider
+metaprovider.metacluster=meta
+metaprovider.metaaddress=localhost:2199
+metaprovider.class=org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.autoscale.impl.local.LocalStatusProvider
+
+metacontroller.target.class=org.apache.helix.autoscale.impl.StaticTargetProvider
+metacontroller.target.container=7

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/RedisYarnSample.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/RedisYarnSample.properties b/recipes/auto-scale/src/main/resources/RedisYarnSample.properties
new file mode 100644
index 0000000..eb58fd2
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/RedisYarnSample.properties
@@ -0,0 +1,89 @@
+###############################################################################
+# Managed cluster configuration
+###############################################################################
+
+cluster.name                      = rediscluster
+cluster.address                   = helix.zookeeper.intra
+
+#
+# Helix cluster resources
+# (as usual)
+#
+resource.0.name                   = redis
+resource.0.cluster                = rediscluster
+resource.0.address                = helix.zookeeper.intra
+resource.0.container              = rediscontainer           # see meta cluster
+resource.0.model                  = OnlineOffline
+resource.0.partitions             = 1024                     # are auto-assigned
+resource.0.replica                = 1                        # no replication needed
+
+#
+# Helix cluster controllers
+# (as usual)
+#
+controller.name                   = controller
+controller.cluster                = rediscluster
+controller.address                = helix.zookeeper.intra
+
+###############################################################################
+# Meta cluster configuration
+###############################################################################
+
+metacluster.name                  = meta
+metacluster.address               = helix.zookeeper.intra
+metacluster.managedcluster        = rediscluster
+metacluster.managedaddress        = helix.zookeeper.intra
+
+#
+# Container Types
+# (Base configuration for instances spawned by providers)
+#
+metaresource.0.name               = rediscontainer
+metaresource.0.class              = org.apache.helix.autoscale.impl.container.RedisServerProcess
+metaresource.0.metacluster        = meta
+metaresource.0.metaaddress        = helix.zookeeper.intra
+metaresource.0.address            = apps.zookeeper.intra
+metaresource.0.baseport           = 17000                    # instance id is added
+
+#
+# Container Instance Providers
+# (Endpoints of container deployment frameworks, e.g. Apache YARN)
+#
+metaprovider.0.name               = provider
+metaprovider.0.class              = org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess
+metaprovider.0.metacluster        = meta
+metaprovider.0.metaaddress        = helix.zookeeper.intra
+metaprovider.0.yarndata           = yarn.zookeeper.intra
+metaprovider.0.resourcemananger   = yarn-rm.intra:8032
+metaprovider.0.scheduler          = yarn-rm.intra:8030
+metaprovider.0.hdfs               = hdfs://yarn-hdfs.intra:9000/
+metaprovider.0.user               = yarnuser
+
+#
+# Helix meta cluster controller
+# (Monitors system state and adapts config of Container Instance Providers)
+#
+metacontroller.name               = metacontroller
+metacontroller.metacluster        = meta
+metacontroller.metaddress         = helix.zookeeper.intra
+metacontroller.autorefresh        = 10000                    # status refresh interval
+
+#
+# Container Status Provider
+# (Provides low-level data on container instance health)
+#
+metacontroller.status.class       = org.apache.helix.autoscale.impl.yarn.YarnStatusProvider
+metacontroller.status.yarndata    = yarn.zookeeper.intra
+
+#
+# Performance Target Model
+# (Provides target number of container instances)
+#
+metacontroller.target.class       = org.apache.helix.autoscale.impl.RedisTargetProvider
+metacontroller.target.address     = apps.zookeeper.intra
+metacontroller.target.interval    = 10000                    # interval for Tps probes
+metacontroller.target.timeout     = 9000                     # timeout of probe
+metacontroller.target.get         = 1000000                  # target "GET" Tps
+metacontroller.target.min         = 1                        # min container count
+metacontroller.target.max         = 23                       # max container count
+metacontroller.target.alpha       = 0.1                      # exponential average
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/log4j.properties b/recipes/auto-scale/src/main/resources/log4j.properties
new file mode 100644
index 0000000..7f29be2
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/log4j.properties
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Set root logger level to DEBUG and its only appender to R.
+log4j.rootLogger=ERROR, C
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.C=org.apache.log4j.ConsoleAppender
+log4j.appender.C.layout=org.apache.log4j.PatternLayout
+log4j.appender.C.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.I0Itec=ERROR
+log4j.logger.org.apache=ERROR
+
+log4j.logger.org.apache.helix.autoscale=INFO

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/config/testng.xml
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/config/testng.xml b/recipes/auto-scale/src/test/config/testng.xml
new file mode 100644
index 0000000..f710791
--- /dev/null
+++ b/recipes/auto-scale/src/test/config/testng.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="Suite" parallel="none">
+  <test name="Test" preserve-order="true">
+    <packages>
+      <package name="org.apache.helix.autoscale.*"/>
+    </packages>
+  </test>
+</suite>

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/BootstrapperIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/BootstrapperIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/BootstrapperIT.java
new file mode 100644
index 0000000..5dd7820
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/BootstrapperIT.java
@@ -0,0 +1,134 @@
+package org.apache.helix.autoscale;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.bootstrapper.Boot;
+import org.apache.helix.autoscale.bootstrapper.ClusterService;
+import org.apache.helix.autoscale.bootstrapper.ControllerService;
+import org.apache.helix.autoscale.bootstrapper.MetaClusterService;
+import org.apache.helix.autoscale.bootstrapper.MetaControllerService;
+import org.apache.helix.autoscale.bootstrapper.MetaProviderService;
+import org.apache.helix.autoscale.bootstrapper.MetaResourceService;
+import org.apache.helix.autoscale.bootstrapper.ResourceService;
+import org.apache.helix.autoscale.bootstrapper.ZookeeperService;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.log4j.Logger;
+import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Bootstrapping tool test. Reads cluster configuration from *.properties files,
+ * spawns services and verifies number of active partitions and containers
+ * 
+ * @see Boot
+ */
+@Test(groups = { "integration", "boot" })
+public class BootstrapperIT {
+
+    static final Logger log = Logger.getLogger(BootstrapperIT.class);
+
+    Boot                boot;
+    HelixAdmin          admin;
+
+    @AfterMethod(alwaysRun = true)
+    public void teardown() throws Exception {
+        log.debug("tearing down bootstrap test");
+        if (admin != null) {
+            admin.close();
+            admin = null;
+        }
+        if (boot != null) {
+            boot.stop();
+            boot = null;
+        }
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+    public void bootstrapLocalTest() throws Exception {
+        boot = new Boot();
+        boot.configure(getProperties("BootLocal.properties"));
+        boot.start();
+
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), ZookeeperService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), ClusterService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), ResourceService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), ControllerService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaClusterService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaResourceService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaProviderService.class));
+        Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaControllerService.class));
+
+        final long limit = System.currentTimeMillis() + TestUtils.REBALANCE_TIMEOUT;
+
+        admin = new ZKHelixAdmin("localhost:2199");
+        waitUntil(admin, "meta", "container", 1, 7, (limit - System.currentTimeMillis()));
+        waitUntil(admin, "cluster", "resource", 7, 10, (limit - System.currentTimeMillis()));
+
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+    public void bootstrap2By2LocalTest() throws Exception {
+        boot = new Boot();
+        boot.configure(getProperties("Boot2By2Local.properties"));
+        boot.start();
+
+        verify2By2Setup();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+    public void bootstrap2By2ShellTest() throws Exception {
+        boot = new Boot();
+        boot.configure(getProperties("Boot2By2Shell.properties"));
+        boot.start();
+
+        verify2By2Setup();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+    public void bootstrap2By2YarnTest() throws Exception {
+        boot = new Boot();
+        boot.configure(getProperties("Boot2By2Yarn.properties"));
+        boot.start();
+
+        verify2By2Setup();
+    }
+
+    void verify2By2Setup() throws Exception {
+        final long limit = System.currentTimeMillis() + TestUtils.REBALANCE_TIMEOUT;
+        final String address = "localhost:2199";
+
+        log.debug(String.format("connecting to zookeeper at '%s'", address));
+
+        admin = new ZKHelixAdmin(address);
+        waitUntil(admin, "meta", "database", 2, 3, (limit - System.currentTimeMillis()));
+        waitUntil(admin, "meta", "webserver", 2, 5, (limit - System.currentTimeMillis()));
+        waitUntil(admin, "cluster", "dbprod", 3, 8, (limit - System.currentTimeMillis()));
+        waitUntil(admin, "cluster", "wsprod", 5, 15, (limit - System.currentTimeMillis()));
+    }
+
+    static void waitUntil(HelixAdmin admin, String cluster, String resource, int instanceCount, int partitionCount, long timeout) throws Exception {
+        final long limit = System.currentTimeMillis() + timeout;
+        TestUtils.waitUntilInstanceCount(admin, cluster, resource, instanceCount, (limit - System.currentTimeMillis()));
+        TestUtils.waitUntilPartitionCount(admin, cluster, resource, partitionCount, (limit - System.currentTimeMillis()));
+    }
+
+    static Properties getProperties(String resourcePath) throws IOException {
+        Properties properties = new Properties();
+        properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+        return properties;
+    }
+
+    static boolean containsInstanceOf(Collection<Service> services, Class<?> clazz) {
+        for (Service service : services) {
+            if (clazz.isAssignableFrom(service.getClass()))
+                return true;
+        }
+        return false;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/FailoverIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/FailoverIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/FailoverIT.java
new file mode 100644
index 0000000..429146a
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/FailoverIT.java
@@ -0,0 +1,195 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.autoscale.impl.local.LocalContainerSingleton;
+import org.apache.helix.autoscale.impl.local.LocalStatusProvider;
+import org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.autoscale.impl.shell.ShellContainerSingleton;
+import org.apache.helix.autoscale.impl.shell.ShellStatusProvider;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.autoscale.impl.yarn.YarnStatusProvider;
+import org.apache.helix.autoscale.impl.yarn.ZookeeperYarnDataProvider;
+import org.apache.helix.autoscale.provider.ProviderRebalancer;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Fault-recovery test for individual containers and whole providers. Missing
+ * containers should be replaced by the meta cluster Rebalancer using remaining
+ * active providers.
+ * 
+ * @see ProviderRebalancer
+ */
+@Test(groups = { "integration", "failure" })
+public class FailoverIT {
+
+    static final Logger  log             = Logger.getLogger(FailoverIT.class);
+
+    static final int     CONTAINER_COUNT = 7;
+
+    StaticTargetProvider targetProvider;
+    YarnStatusProvider   yarnStatusProvider;
+
+    @BeforeClass(alwaysRun = true)
+    public void setupClass() {
+        log.info("installing shutdown hook");
+        Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+            @Override
+            public void run() {
+                try {
+                    teardownTest();
+                } catch (Exception ignore) {
+                }
+                ;
+            }
+        }));
+    }
+
+    @BeforeMethod(alwaysRun = true)
+    public void setupTest() throws Exception {
+        teardownTest();
+        targetProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+    }
+
+    @AfterMethod(alwaysRun = true)
+    public void teardownTest() throws Exception {
+        TestUtils.stopTestCluster();
+
+        if (yarnStatusProvider != null) {
+            yarnStatusProvider.stop();
+            yarnStatusProvider = null;
+        }
+
+        TestUtils.stopZookeeper();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+    public void testLocalContainerFailover() throws Exception {
+        log.info("testing local container failover");
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        TestUtils.startTestCluster(targetProvider, new LocalStatusProvider(), makeLocalProviders(3));
+        killLocalContainers();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+    public void testLocalProviderFailover() throws Exception {
+        log.info("testing local provider failover");
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        TestUtils.startTestCluster(targetProvider, new LocalStatusProvider(), makeLocalProviders(3));
+        killProvider();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+    public void testShellContainerFailover() throws Exception {
+        log.info("testing shell container failover");
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        TestUtils.startTestCluster(targetProvider, new ShellStatusProvider(), makeShellProviders(3));
+        killShellContainers();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+    public void testShellProviderFailover() throws Exception {
+        log.info("testing shell provider failover");
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        TestUtils.startTestCluster(targetProvider, new ShellStatusProvider(), makeShellProviders(3));
+        killProvider();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+    public void testYarnContainerFailover() throws Exception {
+        log.info("testing yarn container failover");
+        TestUtils.configure("distributed.properties");
+        TestUtils.startZookeeper();
+        yarnStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+        yarnStatusProvider.start();
+        TestUtils.startTestCluster(targetProvider, yarnStatusProvider, makeYarnProviders(3));
+        killYarnContainers();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+    public void testYarnProviderFailover() throws Exception {
+        log.info("testing yarn provider failover");
+        TestUtils.configure("distributed.properties");
+        TestUtils.startZookeeper();
+        yarnStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+        yarnStatusProvider.start();
+        TestUtils.startTestCluster(targetProvider, yarnStatusProvider, makeYarnProviders(3));
+        killProvider();
+    }
+
+    void killLocalContainers() throws Exception {
+        LocalContainerSingleton.killProcess("container_2");
+        LocalContainerSingleton.killProcess("container_4");
+        LocalContainerSingleton.killProcess("container_6");
+        Thread.sleep(3000);
+        TestUtils.rebalanceTestCluster();
+        TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+    }
+
+    void killShellContainers() throws Exception {
+        ShellContainerSingleton.killProcess("container_2");
+        ShellContainerSingleton.killProcess("container_4");
+        ShellContainerSingleton.killProcess("container_6");
+        Thread.sleep(3000);
+        TestUtils.rebalanceTestCluster();
+        TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+    }
+
+    void killYarnContainers() throws Exception {
+        ZookeeperYarnDataProvider yarnDataService = new ZookeeperYarnDataProvider(TestUtils.zkAddress);
+        yarnDataService.start();
+        yarnDataService.delete("container_2");
+        yarnDataService.delete("container_4");
+        yarnDataService.delete("container_6");
+        yarnDataService.stop();
+        Thread.sleep(3000);
+        TestUtils.rebalanceTestCluster();
+        TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+    }
+
+    static void killProvider() throws Exception {
+        Iterator<Service> itService = TestUtils.providerServices.iterator();
+        itService.next().stop();
+        itService.remove();
+
+        TestUtils.rebalanceTestCluster();
+        TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+    }
+
+    LocalContainerProviderProcess[] makeLocalProviders(int count) throws Exception {
+        LocalContainerProviderProcess[] localProviders = new LocalContainerProviderProcess[count];
+        for (int i = 0; i < count; i++) {
+            localProviders[i] = TestUtils.makeLocalProvider("provider_" + i);
+        }
+        return localProviders;
+    }
+
+    ShellContainerProviderProcess[] makeShellProviders(int count) throws Exception {
+        ShellContainerProviderProcess[] shellProviders = new ShellContainerProviderProcess[count];
+        for (int i = 0; i < count; i++) {
+            shellProviders[i] = TestUtils.makeShellProvider("provider_" + i);
+        }
+        return shellProviders;
+    }
+
+    YarnContainerProviderProcess[] makeYarnProviders(int count) throws Exception {
+        YarnContainerProviderProcess[] yarnProviders = new YarnContainerProviderProcess[count];
+        for (int i = 0; i < count; i++) {
+            yarnProviders[i] = TestUtils.makeYarnProvider("provider_" + i);
+        }
+        return yarnProviders;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/LocalContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/LocalContainerProviderIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/LocalContainerProviderIT.java
new file mode 100644
index 0000000..94ea5ac
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/LocalContainerProviderIT.java
@@ -0,0 +1,80 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.autoscale.impl.local.LocalStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Local container provider and local status provider test. Scale-up and -down
+ * only, no failures.
+ * 
+ * @see LocalContainerProvider
+ * @see LocalStatusProvider
+ */
+@Test(groups = { "integration", "local" })
+public class LocalContainerProviderIT {
+
+    static final Logger           log             = Logger.getLogger(LocalContainerProviderIT.class);
+
+    static final int              CONTAINER_COUNT = 4;
+
+    StaticTargetProvider          clusterStatusProvider;
+    LocalContainerProviderProcess containerProvider;
+    LocalStatusProvider           containerStatusProvider;
+
+    @BeforeMethod(alwaysRun = true)
+    public void setupTest() throws Exception {
+        teardownTest();
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        containerProvider = TestUtils.makeLocalProvider("provider_0");
+        clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+        containerStatusProvider = new LocalStatusProvider();
+        TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+    }
+
+    @AfterMethod(alwaysRun = true)
+    public void teardownTest() throws Exception {
+        TestUtils.stopTestCluster();
+        TestUtils.stopZookeeper();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testStatic() throws Exception {
+        log.info("testing static");
+        setContainerCount(CONTAINER_COUNT);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleUp() throws Exception {
+        log.info("testing scale up");
+        setContainerCount(CONTAINER_COUNT + 2);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleDown() throws Exception {
+        log.info("testing scale down");
+        setContainerCount(CONTAINER_COUNT - 2);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleCycle() throws Exception {
+        log.info("testing scale cycle");
+        setContainerCount(CONTAINER_COUNT + 2);
+        setContainerCount(CONTAINER_COUNT);
+        setContainerCount(CONTAINER_COUNT - 2);
+        setContainerCount(CONTAINER_COUNT);
+    }
+
+    void setContainerCount(int newContainerCount) throws Exception {
+        log.debug(String.format("Setting container count to %d", newContainerCount));
+        clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+        TestUtils.rebalanceTestCluster();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/ShellContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/ShellContainerProviderIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/ShellContainerProviderIT.java
new file mode 100644
index 0000000..dce4429
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/ShellContainerProviderIT.java
@@ -0,0 +1,95 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.autoscale.impl.shell.ShellStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Shell container provider and shell status provider test. Scale-up and -down
+ * only, no failures.
+ * 
+ * @see ShellContainerProvider
+ * @see ShellStatusProvider
+ */
+@Test(groups = { "integration", "shell" })
+public class ShellContainerProviderIT {
+
+    static final Logger           log               = Logger.getLogger(ShellContainerProviderIT.class);
+
+    static final long             TEST_TIMEOUT      = 20000;
+    static final long             REBALANCE_TIMEOUT = 10000;
+
+    static final int              CONTAINER_COUNT   = 4;
+
+    StaticTargetProvider          clusterStatusProvider;
+    ShellContainerProviderProcess containerProvider;
+    ShellStatusProvider           containerStatusProvider;
+	
+	@BeforeClass(alwaysRun = true)
+	public void setupClass() {
+		log.info("installing shutdown hook");
+		Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+			@Override
+			public void run() {
+				try { teardownTest(); } catch(Exception ignore) {};
+			}
+		}));
+	}
+	
+    @BeforeMethod(alwaysRun = true)
+    public void setupTest() throws Exception {
+        teardownTest();
+        TestUtils.configure();
+        TestUtils.startZookeeper();
+        containerProvider = TestUtils.makeShellProvider("provider_0");
+        clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+        containerStatusProvider = new ShellStatusProvider();
+        TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+    }
+
+    @AfterMethod(alwaysRun = true)
+    public void teardownTest() throws Exception {
+        TestUtils.stopTestCluster();
+        TestUtils.stopZookeeper();
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testStatic() throws Exception {
+        log.info("testing static");
+        setContainerCount(CONTAINER_COUNT);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleUp() throws Exception {
+        log.info("testing scale up");
+        setContainerCount(CONTAINER_COUNT + 2);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleDown() throws Exception {
+        log.info("testing scale down");
+        setContainerCount(CONTAINER_COUNT - 2);
+    }
+
+    @Test(timeOut = TestUtils.TEST_TIMEOUT)
+    public void testScaleCycle() throws Exception {
+        log.info("testing scale cycle");
+        setContainerCount(CONTAINER_COUNT + 2);
+        setContainerCount(CONTAINER_COUNT);
+        setContainerCount(CONTAINER_COUNT - 2);
+        setContainerCount(CONTAINER_COUNT);
+    }
+
+    void setContainerCount(int newContainerCount) throws Exception {
+        log.debug(String.format("Setting container count to %d", newContainerCount));
+        clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+        TestUtils.rebalanceTestCluster();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtils.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtils.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtils.java
new file mode 100644
index 0000000..c68b2ca
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtils.java
@@ -0,0 +1,443 @@
+package org.apache.helix.autoscale;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.TimeoutException;
+
+import org.I0Itec.zkclient.IDefaultNameSpace;
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkServer;
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.StatusProvider;
+import org.apache.helix.autoscale.StatusProviderService;
+import org.apache.helix.autoscale.TargetProvider;
+import org.apache.helix.autoscale.TargetProviderService;
+import org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProperties;
+import org.apache.helix.autoscale.provider.ProviderProperties;
+import org.apache.helix.autoscale.provider.ProviderRebalancer;
+import org.apache.helix.autoscale.provider.ProviderRebalancerSingleton;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.ExternalView;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+/**
+ * Utility for creating a test cluster without the bootstrapping tool. Methods
+ * for verifying the number of active instances and partitions in a cluster.
+ * 
+ */
+public class TestUtils {
+
+    static final Logger                    log                      = Logger.getLogger(TestUtils.class);
+
+    public static int                      zkPort;
+    public static String                   zkAddress;
+    public static String                   resmanAddress;
+    public static String                   schedulerAddress;
+    public static String                   hdfsAddress;
+    public static String                   yarnUser;
+
+    public static final String             metaClusterName          = "meta-cluster";
+    public static final String             managedClusterName       = "managed-cluster";
+    public static final String             metaResourceName         = "container";
+    public static final String             managedResourceName      = "database";
+
+    public static final int                numManagedPartitions     = 10;
+    public static final int                numManagedReplica        = 2;
+
+    public static final long               TEST_TIMEOUT             = 120000;
+    public static final long               REBALANCE_TIMEOUT        = 60000;
+    public static final long               POLL_INTERVAL            = 1000;
+
+    public static final ProviderProperties providerProperties       = new ProviderProperties();
+
+    public static ZkServer                 server                   = null;
+    public static HelixAdmin               admin                    = null;
+    public static HelixManager             metaControllerManager    = null;
+    public static HelixManager             managedControllerManager = null;
+
+    public static Collection<Service>      providerServices         = new ArrayList<Service>();
+    public static Collection<Service>      auxServices              = new ArrayList<Service>();
+
+    public static TargetProvider           targetProvider           = null;
+    public static StatusProvider           statusProvider           = null;
+
+    static {
+        try {
+            configure();
+        } catch(Exception e) {
+            log.error("Could not setup TestUtils", e);
+            throw new RuntimeException(e);
+        }
+    }
+    
+    private TestUtils() {
+        // left blank
+    }
+    
+    public static void configure() throws IOException {
+        configure("standalone.properties");
+    }
+    
+    public static void configure(String resourcePath) throws IOException {
+        log.info(String.format("Configuring Test cluster from %s", resourcePath));
+        Properties properties = new Properties();
+        properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+        configure(properties);
+    }
+
+    public static void configure(Properties properties) {
+        log.info(String.format("Configuring from properties '%s'", properties));
+        
+        zkPort = Integer.valueOf(properties.getProperty("zookeeper.port"));
+        zkAddress = properties.getProperty("zookeeper.address");
+        resmanAddress = properties.getProperty("yarn.resourcemanager");
+        schedulerAddress = properties.getProperty("yarn.scheduler");
+        hdfsAddress = properties.getProperty("yarn.hdfs");
+        yarnUser = properties.getProperty("yarn.user");
+        
+        Preconditions.checkNotNull(zkPort);
+        Preconditions.checkNotNull(zkAddress);
+        Preconditions.checkNotNull(resmanAddress);
+        Preconditions.checkNotNull(schedulerAddress);
+        Preconditions.checkNotNull(hdfsAddress);
+        Preconditions.checkNotNull(yarnUser);
+        
+        configureInternal();
+    }
+    
+    static void configureInternal() {
+        providerProperties.clear();
+        providerProperties.setProperty(ProviderProperties.ADDRESS, zkAddress);
+        providerProperties.setProperty(ProviderProperties.CLUSTER, managedClusterName);
+        providerProperties.setProperty(ProviderProperties.METAADDRESS, zkAddress);
+        providerProperties.setProperty(ProviderProperties.METACLUSTER, metaClusterName);
+        providerProperties.setProperty(ProviderProperties.NAME, "<unknown>");
+    
+        Properties containerProperties = new Properties();
+        containerProperties.setProperty("class", "org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess");
+    
+        providerProperties.addContainer("container", containerProperties);
+    
+        log.info(String.format("Using provider properties '%s'", providerProperties));
+    }
+
+    public static void startZookeeper() throws Exception {
+        log.info("Starting ZooKeeper");
+
+        if (server != null)
+            throw new IllegalStateException("Zookeeper already running");
+
+        server = createLocalZookeeper();
+        server.start();
+    }
+
+    public static void stopZookeeper() throws Exception {
+        log.info("Stopping ZooKeeper");
+
+        if (server != null) {
+            server.shutdown();
+            server = null;
+        }
+    }
+
+    public static void startTestCluster(TargetProviderService targetProvider, StatusProviderService statusProvider, Service... containerProviderProcesses)
+            throws Exception {
+        log.debug(String.format("Starting test cluster"));
+
+        if (server == null)
+            throw new IllegalStateException("Zookeeper not running yet");
+
+        if (!auxServices.isEmpty() || !providerServices.isEmpty() || admin != null || metaControllerManager != null || managedControllerManager != null)
+            throw new IllegalStateException("TestCluster already running");
+
+        log.debug("Create admin");
+        admin = new ZKHelixAdmin(zkAddress);
+
+        log.debug("Create clusters");
+        admin.addCluster(metaClusterName, true);
+        admin.addCluster(managedClusterName, true);
+
+        log.debug("Setup config tool");
+        ProviderRebalancerSingleton.setTargetProvider(targetProvider);
+        ProviderRebalancerSingleton.setStatusProvider(statusProvider);
+
+        log.debug("Starting target and status provider");
+        TestUtils.targetProvider = startAuxService(targetProvider);
+        TestUtils.statusProvider = startAuxService(statusProvider);
+
+        // Managed Cluster
+        log.debug("Setup managed cluster");
+        admin.addStateModelDef(managedClusterName, "MasterSlave", new StateModelDefinition(StateModelConfigGenerator.generateConfigForMasterSlave()));
+        admin.addResource(managedClusterName, managedResourceName, numManagedPartitions, "MasterSlave", RebalanceMode.FULL_AUTO.toString());
+        IdealState managedIdealState = admin.getResourceIdealState(managedClusterName, managedResourceName);
+        managedIdealState.setInstanceGroupTag(metaResourceName);
+        managedIdealState.setReplicas(String.valueOf(numManagedReplica));
+        admin.setResourceIdealState(managedClusterName, managedResourceName, managedIdealState);
+
+        // Meta Cluster
+        log.debug("Setup meta cluster");
+        admin.addStateModelDef(metaClusterName, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+        admin.addResource(metaClusterName, metaResourceName, targetProvider.getTargetContainerCount(metaResourceName), "OnlineOffline",
+                RebalanceMode.USER_DEFINED.toString());
+
+        IdealState idealState = admin.getResourceIdealState(metaClusterName, metaResourceName);
+        idealState.setRebalancerClassName(ProviderRebalancer.class.getName());
+        idealState.setReplicas("1");
+
+        // BEGIN workaround
+        // FIXME workaround for HELIX-226
+        Map<String, List<String>> listFields = Maps.newHashMap();
+        Map<String, Map<String, String>> mapFields = Maps.newHashMap();
+        for (int i = 0; i < 256; i++) {
+            String partitionName = metaResourceName + "_" + i;
+            listFields.put(partitionName, new ArrayList<String>());
+            mapFields.put(partitionName, new HashMap<String, String>());
+        }
+        idealState.getRecord().setListFields(listFields);
+        idealState.getRecord().setMapFields(mapFields);
+        // END workaround
+
+        admin.setResourceIdealState(metaClusterName, metaResourceName, idealState);
+
+        log.debug("Starting container providers");
+        for (Service service : containerProviderProcesses) {
+            startProviderService(service);
+        }
+
+        log.debug("Starting managed cluster controller");
+        managedControllerManager = HelixControllerMain.startHelixController(zkAddress, managedClusterName, "managedController", HelixControllerMain.STANDALONE);
+
+        log.debug("Starting meta cluster controller");
+        metaControllerManager = HelixControllerMain.startHelixController(zkAddress, metaClusterName, "metaController", HelixControllerMain.STANDALONE);
+
+        log.debug("Waiting for stable state");
+        waitUntilRebalancedCount(targetProvider.getTargetContainerCount(metaResourceName));
+    }
+
+    public static void stopTestCluster() throws Exception {
+        log.debug(String.format("Stopping test cluster"));
+        if (managedControllerManager != null) {
+            log.info("Disconnecting managed cluster controller");
+            managedControllerManager.disconnect();
+        }
+        if (metaControllerManager != null) {
+            log.info("Disconnecting meta cluster controller");
+            metaControllerManager.disconnect();
+        }
+        log.info("Stopping provider services");
+        if (providerServices != null) {
+            for (Service service : providerServices) {
+                service.stop();
+            }
+            providerServices.clear();
+        }
+        log.debug("Stopping auxillary services");
+        if (auxServices != null) {
+            for (Service service : auxServices) {
+                service.stop();
+            }
+            auxServices.clear();
+        }
+
+        admin = null;
+        metaControllerManager = null;
+        managedControllerManager = null;
+    }
+
+    public static <T extends Service> T startAuxService(T service) throws Exception {
+        auxServices.add(service);
+        service.start();
+        return service;
+    }
+
+    public static <T extends Service> T startProviderService(T service) throws Exception {
+        providerServices.add(service);
+        service.start();
+        return service;
+    }
+
+    public static void rebalanceTestCluster() throws Exception {
+        log.debug(String.format("Triggering rebalance"));
+        IdealState poke = admin.getResourceIdealState(metaClusterName, metaResourceName);
+        admin.setResourceIdealState(metaClusterName, metaResourceName, poke);
+
+        int current = targetProvider.getTargetContainerCount(TestUtils.metaResourceName);
+        waitUntilRebalancedCount(current);
+    }
+
+    public static void waitUntilRebalancedCount(int containerCount) throws Exception {
+        log.debug(String.format("Waiting for rebalance with %d containers at '%s'", containerCount, zkAddress));
+
+        HelixAdmin admin = new ZKHelixAdmin(zkAddress);
+
+        try {
+            long limit = System.currentTimeMillis() + REBALANCE_TIMEOUT;
+            waitUntilPartitionCount(admin, metaClusterName, metaResourceName, containerCount, (limit - System.currentTimeMillis()));
+            waitUntilInstanceCount(admin, metaClusterName, metaResourceName, providerServices.size(), (limit - System.currentTimeMillis()));
+            waitUntilPartitionCount(admin, managedClusterName, managedResourceName, numManagedPartitions, (limit - System.currentTimeMillis()));
+            
+            // FIXME workaround for Helix FULL_AUTO rebalancer not providing guarantees for cluster expansion
+            //waitUntilInstanceCount(admin, managedClusterName, managedResourceName, containerCount, (limit - System.currentTimeMillis()));
+        } catch (Exception e) {
+            throw e;
+        } finally {
+            admin.close();
+        }
+    }
+
+    public static void waitUntilInstanceCount(HelixAdmin admin, String cluster, String resource, int targetCount, long timeout) throws Exception {
+        log.debug(String.format("Waiting for instance count (cluster='%s', resource='%s', instanceCount=%d, timeout=%d)", cluster, resource, targetCount,
+                timeout));
+
+        long limit = System.currentTimeMillis() + timeout;
+        while (limit > System.currentTimeMillis()) {
+            int assignedCount = getAssingedInstances(admin, cluster, resource).size();
+            log.debug(String.format("checking instance count for '%s:%s': target=%d, current=%d", cluster, resource, targetCount, assignedCount));
+
+            if (targetCount == assignedCount) {
+                return;
+            }
+            Thread.sleep(POLL_INTERVAL);
+        }
+        throw new TimeoutException();
+    }
+
+    public static void waitUntilPartitionCount(HelixAdmin admin, String cluster, String resource, int targetCount, long timeout) throws Exception {
+        log.debug(String.format("Waiting for partition count (cluster='%s', resource='%s', partitionCount=%d, timeout=%d)", cluster, resource, targetCount,
+                timeout));
+
+        long limit = System.currentTimeMillis() + timeout;
+        while (limit > System.currentTimeMillis()) {
+            int assignedCount = getAssingedPartitions(admin, cluster, resource).size();
+            log.debug(String.format("checking partition count for '%s:%s': target=%d, current=%d", cluster, resource, targetCount, assignedCount));
+
+            if (targetCount == assignedCount) {
+                return;
+            }
+            Thread.sleep(POLL_INTERVAL);
+        }
+        throw new TimeoutException();
+    }
+
+    public static Set<String> getAssingedInstances(HelixAdmin admin, String clusterName, String resourceName) {
+        Set<String> assignedInstances = new HashSet<String>();
+
+        ExternalView externalView = admin.getResourceExternalView(clusterName, resourceName);
+
+        if (externalView == null)
+            return assignedInstances;
+
+        for (String partitionName : externalView.getPartitionSet()) {
+            Map<String, String> stateMap = externalView.getStateMap(partitionName);
+            if (stateMap == null)
+                continue;
+
+            for (String instanceName : stateMap.keySet()) {
+                String state = stateMap.get(instanceName);
+                if ("MASTER".equals(state) || "SLAVE".equals(state) || "ONLINE".equals(state)) {
+                    assignedInstances.add(instanceName);
+                }
+            }
+        }
+
+        return assignedInstances;
+    }
+
+    public static Set<String> getAssingedPartitions(HelixAdmin admin, String clusterName, String resourceName) {
+        Set<String> assignedPartitions = new HashSet<String>();
+
+        ExternalView externalView = admin.getResourceExternalView(clusterName, resourceName);
+
+        if (externalView == null)
+            return assignedPartitions;
+
+        for (String partitionName : externalView.getPartitionSet()) {
+            Map<String, String> stateMap = externalView.getStateMap(partitionName);
+            if (stateMap == null)
+                continue;
+
+            for (String instanceName : stateMap.keySet()) {
+                String state = stateMap.get(instanceName);
+                if ("MASTER".equals(state) || "ONLINE".equals(state)) {
+                    assignedPartitions.add(partitionName);
+                }
+            }
+        }
+
+        return assignedPartitions;
+    }
+
+    public static ZkServer createLocalZookeeper() throws Exception {
+        String baseDir = "/tmp/autoscale/";
+        final String dataDir = baseDir + "zk/dataDir";
+        final String logDir = baseDir + "zk/logDir";
+        FileUtils.deleteDirectory(new File(dataDir));
+        FileUtils.deleteDirectory(new File(logDir));
+
+        IDefaultNameSpace defaultNameSpace = new IDefaultNameSpace() {
+            @Override
+            public void createDefaultNameSpace(ZkClient zkClient) {
+
+            }
+        };
+        return new ZkServer(dataDir, logDir, defaultNameSpace, zkPort);
+    }
+
+    public static LocalContainerProviderProcess makeLocalProvider(String name) throws Exception {
+        LocalContainerProviderProcess process = new LocalContainerProviderProcess();
+        process.configure(makeProviderProperties(name));
+        return process;
+    }
+
+    public static ShellContainerProviderProcess makeShellProvider(String name) throws Exception {
+        ShellContainerProviderProcess process = new ShellContainerProviderProcess();
+        process.configure(makeProviderProperties(name));
+        return process;
+    }
+
+    public static YarnContainerProviderProcess makeYarnProvider(String name) throws Exception {
+        YarnContainerProviderProperties properties = new YarnContainerProviderProperties();
+
+        properties.putAll(makeProviderProperties(name));
+        properties.put(YarnContainerProviderProperties.YARNDATA, zkAddress);
+        properties.put(YarnContainerProviderProperties.RESOURCEMANAGER, resmanAddress);
+        properties.put(YarnContainerProviderProperties.SCHEDULER, schedulerAddress);
+        properties.put(YarnContainerProviderProperties.USER, yarnUser);
+        properties.put(YarnContainerProviderProperties.HDFS, hdfsAddress);
+
+        YarnContainerProviderProcess process = new YarnContainerProviderProcess();
+        process.configure(properties);
+
+        return process;
+    }
+
+    static ProviderProperties makeProviderProperties(String name) {
+        ProviderProperties properties = new ProviderProperties();
+        properties.putAll(providerProperties);
+        properties.setProperty(ProviderProperties.NAME, name);
+        return properties;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtilsUT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtilsUT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtilsUT.java
new file mode 100644
index 0000000..d55d7a4
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtilsUT.java
@@ -0,0 +1,63 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.StatusProviderService;
+import org.apache.helix.autoscale.TargetProviderService;
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.local.LocalStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.Test;
+
+/**
+ * Self-test of test cluster. Spawning zookeeper and cluster with single provider and single instance.
+ * 
+ * @see TestUtils
+ */
+@Test(groups={"unit"})
+public class TestUtilsUT {
+
+	static final Logger log = Logger.getLogger(TestUtilsUT.class);
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testZookeeper() throws Exception {
+		log.info("testing zookeeper");
+	    TestUtils.configure();
+		TestUtils.startZookeeper();
+		TestUtils.stopZookeeper();
+	}
+
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testCluster() throws Exception {
+		log.info("testing cluster");
+        TestUtils.configure();
+		TestUtils.startZookeeper();
+		
+		TestUtils.startTestCluster(new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, 1)),
+		        new LocalStatusProvider(), TestUtils.makeLocalProvider("test"));
+		TestUtils.stopTestCluster();
+		
+		TestUtils.stopZookeeper();
+	}
+
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testClusterRepeated() throws Exception {
+		log.info("testing cluster restart");
+        TestUtils.configure();
+		TestUtils.startZookeeper();
+		
+		TargetProviderService statusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, 1));
+		Service containerProvider = TestUtils.makeLocalProvider("test");
+		StatusProviderService containerStatusProvider = new LocalStatusProvider();
+		
+		TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+		TestUtils.stopTestCluster();
+
+		TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+		TestUtils.stopTestCluster();
+
+		TestUtils.stopZookeeper();
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/YarnContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/YarnContainerProviderIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/YarnContainerProviderIT.java
new file mode 100644
index 0000000..78a0bf8
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/YarnContainerProviderIT.java
@@ -0,0 +1,101 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProperties;
+import org.apache.helix.autoscale.impl.yarn.YarnStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Yarn container provider and yarn status provider test. Scale-up and -down
+ * only, no failures.
+ * 
+ * @see YarnContainerProvider
+ * @see YarnStatusProvider
+ */
+@Test(groups={"integration", "yarn"})
+public class YarnContainerProviderIT {
+
+    static final Logger             log             = Logger.getLogger(YarnContainerProviderIT.class);
+
+    static final int                CONTAINER_COUNT = 4;
+
+    StaticTargetProvider            clusterStatusProvider;
+    YarnContainerProviderProcess    containerProvider;
+    YarnStatusProvider              containerStatusProvider;
+
+    YarnContainerProviderProperties properties;
+
+	@BeforeClass(alwaysRun = true)
+	public void setupClass() throws Exception {
+		log.info("installing shutdown hook");
+		Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+			@Override
+			public void run() {
+				try { teardownTest(); } catch(Exception ignore) {};
+			}
+		}));
+	}
+	
+	@BeforeMethod(alwaysRun = true)
+	public void setupTest() throws Exception {
+		log.debug("setting up yarn test case");
+		
+		teardownTest();
+		TestUtils.configure("distributed.properties");
+		TestUtils.startZookeeper();
+		
+		containerProvider = TestUtils.makeYarnProvider("provider_0");
+		containerStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+		clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+		TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+		
+		log.debug("running yarn test case");
+	}
+	
+	@AfterMethod(alwaysRun = true)
+	public void teardownTest() throws Exception {
+		log.debug("cleaning up yarn test case");
+		TestUtils.stopTestCluster();
+		TestUtils.stopZookeeper();
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testStatic() throws Exception {
+		log.info("testing static");
+		setContainerCount(CONTAINER_COUNT);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleUp() throws Exception {
+		log.info("testing scale up");
+		setContainerCount(CONTAINER_COUNT + 2);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleDown() throws Exception {
+		log.info("testing scale down");
+		setContainerCount(CONTAINER_COUNT - 2);
+	}
+	
+	@Test(timeOut = TestUtils.TEST_TIMEOUT)
+	public void testScaleCycle() throws Exception {
+		log.info("testing scale cycle");
+		setContainerCount(CONTAINER_COUNT + 2);
+		setContainerCount(CONTAINER_COUNT);
+		setContainerCount(CONTAINER_COUNT - 2);
+		setContainerCount(CONTAINER_COUNT);
+	}
+	
+	void setContainerCount(int newContainerCount) throws Exception {
+		log.debug(String.format("Setting container count to %d", newContainerCount));
+		clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+		TestUtils.rebalanceTestCluster();
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/resources/distributed.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/resources/distributed.properties b/recipes/auto-scale/src/test/resources/distributed.properties
new file mode 100644
index 0000000..47fd8e0
--- /dev/null
+++ b/recipes/auto-scale/src/test/resources/distributed.properties
@@ -0,0 +1,13 @@
+#
+# zookeeper will be spawned locally by TestUtils
+#
+zookeeper.port=2199
+zookeeper.address=rm:2199
+
+#
+# YARN and HDFS must be set up manually
+#
+yarn.resourcemanager=rm:8032
+yarn.scheduler=rm:8030
+yarn.hdfs=hdfs://rm:9000/
+yarn.user=yarn
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/resources/log4j.properties b/recipes/auto-scale/src/test/resources/log4j.properties
new file mode 100644
index 0000000..65800cc
--- /dev/null
+++ b/recipes/auto-scale/src/test/resources/log4j.properties
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Set root logger level to DEBUG and its only appender to R.
+log4j.rootLogger=ERROR, C
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.C=org.apache.log4j.ConsoleAppender
+log4j.appender.C.layout=org.apache.log4j.PatternLayout
+log4j.appender.C.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.I0Itec=ERROR
+log4j.logger.org.apache=ERROR
+
+log4j.logger.org.apache.helix.autoscale=DEBUG

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/resources/standalone.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/resources/standalone.properties b/recipes/auto-scale/src/test/resources/standalone.properties
new file mode 100644
index 0000000..d4b4e86
--- /dev/null
+++ b/recipes/auto-scale/src/test/resources/standalone.properties
@@ -0,0 +1,13 @@
+#
+# zookeeper will be spawned locally by TestUtils
+#
+zookeeper.port=2199
+zookeeper.address=localhost:2199
+
+#
+# YARN and HDFS must be set up manually
+#
+yarn.resourcemanager=localhost:8032
+yarn.scheduler=localhost:8030
+yarn.hdfs=hdfs://localhost:9000/
+yarn.user=yarn
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/README.md
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/README.md b/recipes/meta-cluster-manager/README.md
new file mode 100644
index 0000000..9a8acf4
--- /dev/null
+++ b/recipes/meta-cluster-manager/README.md
@@ -0,0 +1,82 @@
+<!---
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+Meta cluster manager
+------------------------
+Auto-scaling for helix clusters using a meta cluster. The managed cluster operates as usual, managing resources and instances via AUTO_REBALANCE. The meta cluster monitors the managed cluster and injects or removes instances based on demand.
+
+The meta cluster makes decisions about scaling up or down based on information obtained from a "ClusterStatusProvider". A custom "ProviderRebalancer" is invoked testing the health of existing participants in the managed cluster with the "ContainerStatusProvider". If participants need to be (re-)deployed the "ContainerProvider" is invoked to instantiate and inject participants in the managed cluster.
+
+ContainerProviders are the participants of the meta cluster and there are multiple different implementations of the "ContainerProvider". First, the "LocalContainerProvider" spawns VM-local participants, i.e. participants of the managed cluster are spawned in the same VM the container provider exists. This is mainly useful for testing. Second, the "ShellContainerProvider" spawns a separate VM process for each participant using shell commands. Third, the "YarnContainerProvider" creates processes as container on a YARN cluster and manages their status using an external meta-data service (Zookeeper in this implementation). This implementation is fairly complex and has a number of external dependencies on a working YARN cluster and running services.
+
+Even though there are different types of providers the notion of a "ContainerProcess" abstracts implementation specifics. A process implementation inherits from "ContainerProcess" and can be instantiated by all three types of container providers. CAUTION: since separate VM process might be used a VM external method for coordination is required (e.g. Zookeeper)
+
+Configuration settings are passed throughout the application using traditional Properties objects. The "ConfigTool" contains default paths and helps to inject dependencies in the ProviderRebalancer.
+
+The application can be run and tested in three ways. First, a comprehensive suite of unit and integration tests can be run using "mvn verify". Second, the "Bootstrapper" can deploy a live managed and meta cluster based on a specification (e.g. "2by2shell.properties"), Third, the "MetaManagerDemo" deploys a test cluster and allows the user to step through a cycle of scale-up and scale-down as well as simulated container and container provider failures. 
+
+
+The IdealState of the meta cluster uses the ONLINE-OFFLINE model and maps as follows in the axample below:
+
+Resource: type of container, e.g. database, webserver
+Partition: container id
+Instance: responsible container provider
+
+META:
+
+database
+  database_0
+    provider_0 : ONLINE
+  database_1
+    provider_1 : ONLINE
+webserver
+  webserver_0
+    provider_0 : ONLINE
+  webserver_1
+    provider_1 : ONLINE
+  webserver_2
+    provider_0 : ONLINE
+
+      
+MANAGED:
+
+dbprod (tag=database)
+  dbprod_0
+    database_0 : MASTER
+    database_1 : SLAVE
+  dbprod_1
+    database_0 : SLAVE
+    database_1 : MASTER
+  dbprod_2
+    database_0 : MASTER
+    database_1 : SLAVE
+wsprod (tag=webserver)
+  wsprod_0
+    webserver_0 : ONLINE
+  wsprod_1
+    webserver_1 : ONLINE
+  wsprod_2
+    webserver_2 : ONLINE
+  wsprod_3
+    webserver_0 : ONLINE
+  wsprod_4
+    webserver_1 : ONLINE
+  wsprod_5
+    webserver_2 : ONLINE
+    
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/pom.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/pom.xml b/recipes/meta-cluster-manager/pom.xml
new file mode 100644
index 0000000..ba5eb69
--- /dev/null
+++ b/recipes/meta-cluster-manager/pom.xml
@@ -0,0 +1,210 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.helix.recipes</groupId>
+    <artifactId>recipes</artifactId>
+    <version>0.6.2-incubating-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>meta-cluster-manager</artifactId>
+  <packaging>jar</packaging>
+  <name>Apache Helix :: Recipes :: meta cluster manager</name>
+  
+  <properties>
+    <hadoop.version>0.23.9</hadoop.version>
+
+    <ut.groups>unit</ut.groups>
+    <it.groups>local, shell</it.groups>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.testng</groupId>
+      <artifactId>testng</artifactId>
+      <version>6.0.1</version>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>junit</groupId>
+          <artifactId>junit</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.google.code.gson</groupId>
+      <artifactId>gson</artifactId>
+      <version>2.2.4</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.helix</groupId>
+      <artifactId>helix-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.mail</groupId>
+          <artifactId>mail</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.jms</groupId>
+          <artifactId>jms</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jdmk</groupId>
+          <artifactId>jmxtools</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jmx</groupId>
+          <artifactId>jmxri</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>14.0.1</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-api</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-common</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
+  </dependencies>
+  
+  <build>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>org.codehaus.mojo</groupId>
+          <artifactId>appassembler-maven-plugin</artifactId>
+          <configuration>
+            <configurationDirectory>conf</configurationDirectory>
+            <copyConfigurationDirectory>true</copyConfigurationDirectory>
+            <includeConfigurationDirectoryInClasspath>true</includeConfigurationDirectoryInClasspath>
+            <assembleDirectory>${project.build.directory}/metamanager-pkg</assembleDirectory>
+            <extraJvmArguments>-Xms512m -Xmx512m</extraJvmArguments>
+            <platforms>
+              <platform>unix</platform>
+            </platforms>
+          </configuration>
+          <executions>
+            <execution>
+              <phase>package</phase>
+              <goals>
+                <goal>assemble</goal>
+              </goals>
+            </execution>
+          </executions>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.rat</groupId>
+          <artifactId>apache-rat-plugin</artifactId>
+            <configuration>
+              <excludes combine.children="append">
+              </excludes>
+            </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+    
+    <plugins>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>appassembler-maven-plugin</artifactId>
+        <configuration>
+          <programs>
+            <program>
+              <mainClass>org.apache.helix.metamanager.bootstrapper.Boot</mainClass>
+              <name>boot</name>
+            </program>
+            <program>
+              <mainClass>org.apache.helix.metamanager.impl.shell.ShellContainerProcess</mainClass>
+              <name>shell-container-process</name>
+            </program>
+            <program>
+              <mainClass>org.apache.helix.metamanager.impl.yarn.YarnMasterProcess</mainClass>
+              <name>yarn-master-process</name>
+            </program>
+            <program>
+              <mainClass>org.apache.helix.metamanager.impl.yarn.YarnContainerProcess</mainClass>
+              <name>yarn-container-process</name>
+            </program>
+            <program>
+              <mainClass>org.apache.helix.metamanager.ZookeeperSetter</mainClass>
+              <name>zookeeper-setter</name>
+            </program>
+          </programs>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>single</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <finalName>metamanager</finalName>
+          <descriptor>src/main/assembly/assembly.xml</descriptor>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <groups>${ut.groups}</groups>
+          <excludedGroups>integration</excludedGroups>
+          <suiteXmlFiles>
+            <suiteXmlFile>src/test/config/testng.xml</suiteXmlFile>
+          </suiteXmlFiles>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-failsafe-plugin</artifactId>
+        <configuration>
+          <groups>${it.groups}</groups>
+          <excludedGroups>unit</excludedGroups>
+          <suiteXmlFiles>
+            <suiteXmlFile>src/test/config/testng.xml</suiteXmlFile>
+          </suiteXmlFiles>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>integration-test</goal>
+              <goal>verify</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+  
+  <profiles>
+    <profile>
+      <id>yarn</id>
+      <properties>
+        <it.groups>yarn</it.groups>
+      </properties>
+    </profile>
+  </profiles>
+</project>