You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@helix.apache.org by ki...@apache.org on 2013/09/20 20:30:10 UTC
[01/15] Adding Helix-task-framework and Yarn integration modules
Updated Branches:
refs/heads/helix-yarn [created] e38aa54b0
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/FailoverIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/FailoverIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/FailoverIT.java
new file mode 100644
index 0000000..17a6047
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/FailoverIT.java
@@ -0,0 +1,172 @@
+package org.apache.helix.metamanager.integration;
+
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.TestUtils;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.metamanager.impl.local.LocalContainerSingleton;
+import org.apache.helix.metamanager.impl.local.LocalStatusProvider;
+import org.apache.helix.metamanager.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.metamanager.impl.shell.ShellContainerSingleton;
+import org.apache.helix.metamanager.impl.shell.ShellStatusProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.metamanager.impl.yarn.YarnStatusProvider;
+import org.apache.helix.metamanager.impl.yarn.ZookeeperMetadataProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+public class FailoverIT {
+
+ static final Logger log = Logger.getLogger(FailoverIT.class);
+
+ static final int CONTAINER_COUNT = 7;
+
+ StaticTargetProvider targetProvider;
+ YarnStatusProvider yarnStatusProvider;
+
+ @BeforeClass
+ public void setupClass() {
+ log.info("installing shutdown hook");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ try { teardownTest(); } catch(Exception ignore) {};
+ }
+ }));
+ }
+
+ @BeforeMethod
+ public void setupTest() throws Exception {
+ teardownTest();
+ TestUtils.startZookeeper();
+ targetProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ }
+
+ @AfterMethod
+ public void teardownTest() throws Exception {
+ TestUtils.stopTestCluster();
+
+ if(yarnStatusProvider != null) {
+ yarnStatusProvider.stop();
+ yarnStatusProvider = null;
+ }
+
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testLocalContainerFailover() throws Exception {
+ log.info("testing local container failover");
+ TestUtils.startTestCluster(targetProvider, new LocalStatusProvider(), makeLocalProviders(3));
+ killLocalContainers();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testLocalProviderFailover() throws Exception {
+ log.info("testing local provider failover");
+ TestUtils.startTestCluster(targetProvider, new LocalStatusProvider(), makeLocalProviders(3));
+ killProvider();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testShellContainerFailover() throws Exception {
+ log.info("testing shell container failover");
+ TestUtils.startTestCluster(targetProvider, new ShellStatusProvider(), makeShellProviders(3));
+ killShellContainers();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testShellProviderFailover() throws Exception {
+ log.info("testing shell provider failover");
+ TestUtils.startTestCluster(targetProvider, new ShellStatusProvider(), makeShellProviders(3));
+ killProvider();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testYarnContainerFailover() throws Exception {
+ log.info("testing yarn container failover");
+ yarnStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+ yarnStatusProvider.start();
+ TestUtils.startTestCluster(targetProvider, yarnStatusProvider, makeYarnProviders(3));
+ killYarnContainers();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testYarnProviderFailover() throws Exception {
+ log.info("testing yarn provider failover");
+ yarnStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+ yarnStatusProvider.start();
+ TestUtils.startTestCluster(targetProvider, yarnStatusProvider, makeYarnProviders(3));
+ killProvider();
+ }
+
+ void killLocalContainers() throws Exception {
+ LocalContainerSingleton.killProcess("container_2");
+ LocalContainerSingleton.killProcess("container_4");
+ LocalContainerSingleton.killProcess("container_6");
+ Thread.sleep(3000);
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+ }
+
+ void killShellContainers() throws Exception {
+ ShellContainerSingleton.killProcess("container_2");
+ ShellContainerSingleton.killProcess("container_4");
+ ShellContainerSingleton.killProcess("container_6");
+ Thread.sleep(3000);
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+ }
+
+ void killYarnContainers() throws Exception {
+ ZookeeperMetadataProvider metadata = new ZookeeperMetadataProvider(TestUtils.zkAddress);
+ metadata.start();
+ metadata.delete("container_2");
+ metadata.delete("container_4");
+ metadata.delete("container_6");
+ metadata.stop();
+ Thread.sleep(3000);
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+ }
+
+ static void killProvider() throws Exception {
+ Iterator<Service> itService = TestUtils.providerServices.iterator();
+ itService.next().stop();
+ itService.remove();
+
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+ }
+
+ LocalContainerProviderProcess[] makeLocalProviders(int count) throws Exception {
+ LocalContainerProviderProcess[] localProviders = new LocalContainerProviderProcess[count];
+ for(int i=0; i<count; i++) {
+ localProviders[i] = TestUtils.makeLocalProvider("provider_" + i);
+ }
+ return localProviders;
+ }
+
+ ShellContainerProviderProcess[] makeShellProviders(int count) throws Exception {
+ ShellContainerProviderProcess[] shellProviders = new ShellContainerProviderProcess[count];
+ for(int i=0; i<count; i++) {
+ shellProviders[i] = TestUtils.makeShellProvider("provider_" + i);
+ }
+ return shellProviders;
+ }
+
+ YarnContainerProviderProcess[] makeYarnProviders(int count) throws Exception {
+ YarnContainerProviderProcess[] yarnProviders = new YarnContainerProviderProcess[count];
+ for(int i=0; i<count; i++) {
+ yarnProviders[i] = TestUtils.makeYarnProvider("provider_" + i);
+ }
+ return yarnProviders;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/LocalContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/LocalContainerProviderIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/LocalContainerProviderIT.java
new file mode 100644
index 0000000..6f9b6df
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/LocalContainerProviderIT.java
@@ -0,0 +1,72 @@
+package org.apache.helix.metamanager.integration;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.TestUtils;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.metamanager.impl.local.LocalStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+public class LocalContainerProviderIT {
+
+ static final Logger log = Logger.getLogger(LocalContainerProviderIT.class);
+
+ static final int CONTAINER_COUNT = 4;
+
+ StaticTargetProvider clusterStatusProvider;
+ LocalContainerProviderProcess containerProvider;
+ LocalStatusProvider containerStatusProvider;
+
+ @BeforeMethod
+ public void setupTest() throws Exception {
+ teardownTest();
+ TestUtils.startZookeeper();
+ containerProvider = TestUtils.makeLocalProvider("provider_0");
+ clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ containerStatusProvider = new LocalStatusProvider();
+ TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+ }
+
+ @AfterMethod
+ public void teardownTest() throws Exception {
+ TestUtils.stopTestCluster();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testStatic() throws Exception {
+ log.info("testing static");
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleUp() throws Exception {
+ log.info("testing scale up");
+ setContainerCount(CONTAINER_COUNT + 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleDown() throws Exception {
+ log.info("testing scale down");
+ setContainerCount(CONTAINER_COUNT - 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleCycle() throws Exception {
+ log.info("testing scale cycle");
+ setContainerCount(CONTAINER_COUNT + 2);
+ setContainerCount(CONTAINER_COUNT);
+ setContainerCount(CONTAINER_COUNT - 2);
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ void setContainerCount(int newContainerCount) throws Exception {
+ log.debug(String.format("Setting container count to %d", newContainerCount));
+ clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+ TestUtils.rebalanceTestCluster();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/MultipleProviderFailoverIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/MultipleProviderFailoverIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/MultipleProviderFailoverIT.java
new file mode 100644
index 0000000..1c7edc7
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/MultipleProviderFailoverIT.java
@@ -0,0 +1,148 @@
+package org.apache.helix.metamanager.integration;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.StaticStatusProvider;
+import org.apache.helix.metamanager.TestUtils;
+import org.apache.helix.metamanager.impl.local.LocalContainerProvider;
+import org.apache.helix.metamanager.impl.local.LocalContainerSingleton;
+import org.apache.helix.metamanager.impl.local.LocalContainerStatusProvider;
+import org.apache.helix.metamanager.impl.shell.ShellContainerProvider;
+import org.apache.helix.metamanager.impl.shell.ShellContainerSingleton;
+import org.apache.helix.metamanager.impl.shell.ShellContainerStatusProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+public class MultipleProviderFailoverIT {
+
+ static final Logger log = Logger.getLogger(MultipleProviderFailoverIT.class);
+
+ static final long TEST_TIMEOUT = 60000;
+ static final long REBALANCE_TIMEOUT = 30000;
+
+ static final int CONTAINER_COUNT = 7;
+
+ StaticStatusProvider clusterStatusProvider;
+
+ YarnContainerStatusProvider yarnStatusProvider;
+
+ @BeforeClass
+ public void setupClass() {
+ log.info("installing shutdown hook");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ try { teardownTest(); } catch(Exception ignore) {};
+ }
+ }));
+ }
+
+ @BeforeMethod
+ public void setupTest() throws Exception {
+ teardownTest();
+ TestUtils.startZookeeper();
+ clusterStatusProvider = new StaticStatusProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ }
+
+ @AfterMethod
+ public void teardownTest() throws Exception {
+ TestUtils.stopTestCluster();
+ LocalContainerSingleton.reset();
+ ShellContainerSingleton.reset();
+ if(yarnStatusProvider != null) {
+ yarnStatusProvider.stopService();
+ yarnStatusProvider = null;
+ }
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TEST_TIMEOUT)
+ public void testLocalContainerFailover() throws Exception {
+ log.info("testing local container failover");
+ TestUtils.startTestCluster(clusterStatusProvider, new LocalContainerStatusProvider(), makeLocalProviders(3));
+ killContainers();
+ }
+
+ @Test(timeOut = TEST_TIMEOUT)
+ public void testLocalProviderFailover() throws Exception {
+ log.info("testing local provider failover");
+ TestUtils.startTestCluster(clusterStatusProvider, new LocalContainerStatusProvider(), makeLocalProviders(3));
+ killProvider();
+ }
+
+ @Test(timeOut = TEST_TIMEOUT)
+ public void testShellContainerFailover() throws Exception {
+ log.info("testing shell container failover");
+ TestUtils.startTestCluster(clusterStatusProvider, new ShellContainerStatusProvider(), makeShellProviders(3));
+ killContainers();
+ }
+
+ @Test(timeOut = TEST_TIMEOUT)
+ public void testShellProviderFailover() throws Exception {
+ log.info("testing shell provider failover");
+ TestUtils.startTestCluster(clusterStatusProvider, new ShellContainerStatusProvider(), makeShellProviders(3));
+ killProvider();
+ }
+
+ @Test(timeOut = TEST_TIMEOUT)
+ public void testYarnContainerFailover() throws Exception {
+ log.info("testing yarn container failover");
+ yarnStatusProvider = new YarnContainerStatusProvider(TestUtils.zkAddress);
+ yarnStatusProvider.startService();
+ TestUtils.startTestCluster(clusterStatusProvider, yarnStatusProvider, makeYarnProviders(3));
+ killContainers();
+ }
+
+ @Test(timeOut = TEST_TIMEOUT)
+ public void testYarnProviderFailover() throws Exception {
+ log.info("testing yarn provider failover");
+ yarnStatusProvider = new YarnContainerStatusProvider(TestUtils.zkAddress);
+ yarnStatusProvider.startService();
+ TestUtils.startTestCluster(clusterStatusProvider, yarnStatusProvider, makeYarnProviders(3));
+ killProvider();
+ }
+
+ static void killContainers() throws Exception {
+ TestUtils.containerProviders.get(1).destroy("container_2");
+ TestUtils.containerProviders.get(1).destroy("container_4");
+ TestUtils.containerProviders.get(1).destroy("container_6");
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT, REBALANCE_TIMEOUT);
+ }
+
+ static void killProvider() throws Exception {
+ TestUtils.managerProcesses.get(1).stop();
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT, REBALANCE_TIMEOUT);
+ }
+
+ static LocalContainerProvider[] makeLocalProviders(int count) {
+ LocalContainerProvider[] providers = new LocalContainerProvider[count];
+ for(int i=0; i<count; i++) {
+ providers[i] = TestUtils.makeLocalProvider("provider_" + i);
+ }
+ return providers;
+ }
+
+ static ShellContainerProvider[] makeShellProviders(int count) {
+ ShellContainerProvider[] providers = new ShellContainerProvider[count];
+ for(int i=0; i<count; i++) {
+ providers[i] = TestUtils.makeShellProvider("provider_" + i);
+ }
+ return providers;
+ }
+
+ YarnContainerProvider[] makeYarnProviders(int count) throws Exception {
+ YarnContainerProvider[] providers = new YarnContainerProvider[count];
+ for(int i=0; i<count; i++) {
+ providers[i] = TestUtils.makeYarnProvider("provider_" + i);
+ }
+ return providers;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/ShellContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/ShellContainerProviderIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/ShellContainerProviderIT.java
new file mode 100644
index 0000000..19d37a1
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/ShellContainerProviderIT.java
@@ -0,0 +1,87 @@
+package org.apache.helix.metamanager.integration;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.TestUtils;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.metamanager.impl.shell.ShellStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+public class ShellContainerProviderIT {
+
+ static final Logger log = Logger.getLogger(ShellContainerProviderIT.class);
+
+ static final long TEST_TIMEOUT = 20000;
+ static final long REBALANCE_TIMEOUT = 10000;
+
+ static final int CONTAINER_COUNT = 4;
+
+ StaticTargetProvider clusterStatusProvider;
+ ShellContainerProviderProcess containerProvider;
+ ShellStatusProvider containerStatusProvider;
+
+ @BeforeClass
+ public void setupClass() {
+ log.info("installing shutdown hook");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ try { teardownTest(); } catch(Exception ignore) {};
+ }
+ }));
+ }
+
+ @BeforeMethod
+ public void setupTest() throws Exception {
+ teardownTest();
+ TestUtils.startZookeeper();
+ containerProvider = TestUtils.makeShellProvider("provider_0");
+ clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ containerStatusProvider = new ShellStatusProvider();
+ TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+ }
+
+ @AfterMethod
+ public void teardownTest() throws Exception {
+ TestUtils.stopTestCluster();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testStatic() throws Exception {
+ log.info("testing static");
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleUp() throws Exception {
+ log.info("testing scale up");
+ setContainerCount(CONTAINER_COUNT + 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleDown() throws Exception {
+ log.info("testing scale down");
+ setContainerCount(CONTAINER_COUNT - 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleCycle() throws Exception {
+ log.info("testing scale cycle");
+ setContainerCount(CONTAINER_COUNT + 2);
+ setContainerCount(CONTAINER_COUNT);
+ setContainerCount(CONTAINER_COUNT - 2);
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ void setContainerCount(int newContainerCount) throws Exception {
+ log.debug(String.format("Setting container count to %d", newContainerCount));
+ clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+ TestUtils.rebalanceTestCluster();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/YarnContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/YarnContainerProviderIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/YarnContainerProviderIT.java
new file mode 100644
index 0000000..16a9ad6
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/YarnContainerProviderIT.java
@@ -0,0 +1,93 @@
+package org.apache.helix.metamanager.integration;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.TestUtils;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProperties;
+import org.apache.helix.metamanager.impl.yarn.YarnStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+public class YarnContainerProviderIT {
+
+ static final Logger log = Logger.getLogger(YarnContainerProviderIT.class);
+
+ static final int CONTAINER_COUNT = 4;
+
+ StaticTargetProvider clusterStatusProvider;
+ YarnContainerProviderProcess containerProvider;
+ YarnStatusProvider containerStatusProvider;
+
+ YarnContainerProviderProperties properties;
+
+ @BeforeClass
+ public void setupClass() throws Exception {
+ log.info("installing shutdown hook");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ try { teardownTest(); } catch(Exception ignore) {};
+ }
+ }));
+ }
+
+ @BeforeMethod
+ public void setupTest() throws Exception {
+ log.debug("setting up yarn test case");
+
+ teardownTest();
+ TestUtils.startZookeeper();
+
+ containerProvider = TestUtils.makeYarnProvider("provider_0");
+ containerStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+ clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+
+ log.debug("running yarn test case");
+ }
+
+ @AfterMethod
+ public void teardownTest() throws Exception {
+ log.debug("cleaning up yarn test case");
+ TestUtils.stopTestCluster();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testStatic() throws Exception {
+ log.info("testing static");
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleUp() throws Exception {
+ log.info("testing scale up");
+ setContainerCount(CONTAINER_COUNT + 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleDown() throws Exception {
+ log.info("testing scale down");
+ setContainerCount(CONTAINER_COUNT - 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleCycle() throws Exception {
+ log.info("testing scale cycle");
+ setContainerCount(CONTAINER_COUNT + 2);
+ setContainerCount(CONTAINER_COUNT);
+ setContainerCount(CONTAINER_COUNT - 2);
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ void setContainerCount(int newContainerCount) throws Exception {
+ log.debug(String.format("Setting container count to %d", newContainerCount));
+ clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+ TestUtils.rebalanceTestCluster();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/unit/TestUtilsTestUT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/unit/TestUtilsTestUT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/unit/TestUtilsTestUT.java
new file mode 100644
index 0000000..9570b54
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/unit/TestUtilsTestUT.java
@@ -0,0 +1,62 @@
+package org.apache.helix.metamanager.unit;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.ClusterStatusProvider;
+import org.apache.helix.metamanager.ContainerProvider;
+import org.apache.helix.metamanager.ContainerStatusProvider;
+import org.apache.helix.metamanager.StaticStatusProvider;
+import org.apache.helix.metamanager.TestUtils;
+import org.apache.helix.metamanager.impl.local.LocalContainerSingleton;
+import org.apache.helix.metamanager.impl.local.LocalContainerStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class TestUtilsTestUT {
+
+ static final Logger log = Logger.getLogger(TestUtilsTestUT.class);
+
+ @AfterMethod
+ public void teardownTest() throws Exception {
+ LocalContainerSingleton.reset();
+ }
+
+ @Test
+ public void testZookeeper() throws Exception {
+ log.info("testing zookeeper");
+ TestUtils.startZookeeper();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test
+ public void testCluster() throws Exception {
+ log.info("testing cluster");
+ TestUtils.startZookeeper();
+
+ TestUtils.startTestCluster(new StaticStatusProvider(Collections.singletonMap(TestUtils.metaResourceName, 1)),
+ new LocalContainerStatusProvider(), TestUtils.makeLocalProvider("test"));
+ TestUtils.stopTestCluster();
+
+ TestUtils.stopZookeeper();
+ }
+
+ @Test
+ public void testClusterRepeated() throws Exception {
+ log.info("testing cluster restart");
+ TestUtils.startZookeeper();
+
+ ClusterStatusProvider statusProvider = new StaticStatusProvider(Collections.singletonMap(TestUtils.metaResourceName, 1));
+ ContainerProvider containerProvider = TestUtils.makeLocalProvider("test");
+ ContainerStatusProvider containerStatusProvider = new LocalContainerStatusProvider();
+
+ TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+ TestUtils.stopTestCluster();
+
+ TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+ TestUtils.stopTestCluster();
+
+ TestUtils.stopZookeeper();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/unit/TestUtilsUT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/unit/TestUtilsUT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/unit/TestUtilsUT.java
new file mode 100644
index 0000000..a4d5dd3
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/unit/TestUtilsUT.java
@@ -0,0 +1,55 @@
+package org.apache.helix.metamanager.unit;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.StatusProviderService;
+import org.apache.helix.metamanager.TargetProviderService;
+import org.apache.helix.metamanager.TestUtils;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.local.LocalStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.Test;
+
+public class TestUtilsUT {
+
+ static final Logger log = Logger.getLogger(TestUtilsUT.class);
+
+ @Test
+ public void testZookeeper() throws Exception {
+ log.info("testing zookeeper");
+ TestUtils.startZookeeper();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test
+ public void testCluster() throws Exception {
+ log.info("testing cluster");
+ TestUtils.startZookeeper();
+
+ TestUtils.startTestCluster(new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, 1)),
+ new LocalStatusProvider(), TestUtils.makeLocalProvider("test"));
+ TestUtils.stopTestCluster();
+
+ TestUtils.stopZookeeper();
+ }
+
+ @Test
+ public void testClusterRepeated() throws Exception {
+ log.info("testing cluster restart");
+ TestUtils.startZookeeper();
+
+ TargetProviderService statusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, 1));
+ Service containerProvider = TestUtils.makeLocalProvider("test");
+ StatusProviderService containerStatusProvider = new LocalStatusProvider();
+
+ TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+ TestUtils.stopTestCluster();
+
+ TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+ TestUtils.stopTestCluster();
+
+ TestUtils.stopZookeeper();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/resources/distributed.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/resources/distributed.properties b/recipes/meta-cluster-manager/src/test/resources/distributed.properties
new file mode 100644
index 0000000..47fd8e0
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/resources/distributed.properties
@@ -0,0 +1,13 @@
+#
+# zookeeper will be spawned locally by TestUtils
+#
+zookeeper.port=2199
+zookeeper.address=rm:2199
+
+#
+# YARN and HDFS must be set up manually
+#
+yarn.resourcemanager=rm:8032
+yarn.scheduler=rm:8030
+yarn.hdfs=hdfs://rm:9000/
+yarn.user=yarn
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/resources/log4j.properties b/recipes/meta-cluster-manager/src/test/resources/log4j.properties
new file mode 100644
index 0000000..57bc008
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/resources/log4j.properties
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Set root logger level to DEBUG and its only appender to R.
+log4j.rootLogger=ERROR, C
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.C=org.apache.log4j.ConsoleAppender
+log4j.appender.C.layout=org.apache.log4j.PatternLayout
+log4j.appender.C.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.I0Itec=ERROR
+log4j.logger.org.apache=ERROR
+
+log4j.logger.org.apache.helix.metamanager=DEBUG
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/resources/standalone.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/resources/standalone.properties b/recipes/meta-cluster-manager/src/test/resources/standalone.properties
new file mode 100644
index 0000000..d4b4e86
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/resources/standalone.properties
@@ -0,0 +1,13 @@
+#
+# zookeeper will be spawned locally by TestUtils
+#
+zookeeper.port=2199
+zookeeper.address=localhost:2199
+
+#
+# YARN and HDFS must be set up manually
+#
+yarn.resourcemanager=localhost:8032
+yarn.scheduler=localhost:8030
+yarn.hdfs=hdfs://localhost:9000/
+yarn.user=yarn
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/pom.xml
----------------------------------------------------------------------
diff --git a/recipes/pom.xml b/recipes/pom.xml
index 3667650..5c13e6b 100644
--- a/recipes/pom.xml
+++ b/recipes/pom.xml
@@ -30,6 +30,7 @@ under the License.
<name>Apache Helix :: Recipes</name>
<modules>
+ <module>auto-scale</module>
<module>rabbitmq-consumer-group</module>
<module>rsync-replicated-file-system</module>
<module>distributed-lock-manager</module>
[13/15] Adding Helix-task-framework and Yarn integration modules
Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/test/java/org/apache/helix/integration/task/TestTaskRebalancer.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/integration/task/TestTaskRebalancer.java b/helix-core/src/test/java/org/apache/helix/integration/task/TestTaskRebalancer.java
new file mode 100644
index 0000000..dec884b
--- /dev/null
+++ b/helix-core/src/test/java/org/apache/helix/integration/task/TestTaskRebalancer.java
@@ -0,0 +1,330 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.integration.task;
+
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.helix.*;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.integration.ZkIntegrationTestBase;
+import org.apache.helix.task.*;
+import org.apache.helix.tools.ClusterSetup;
+import org.apache.helix.tools.ClusterStateVerifier;
+import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+
+/**
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TestTaskRebalancer extends ZkIntegrationTestBase
+{
+ private static final int NUM_NODES = 5;
+ private static final int START_PORT = 12918;
+ private static final String MASTER_SLAVE_STATE_MODEL = "MasterSlave";
+ private static final int NUM_PARTITIONS = 20;
+ private static final int NUM_REPLICAS = 3;
+ private final String CLUSTER_NAME = CLUSTER_PREFIX + "_" + getShortClassName();
+ private final Map<String, TestHelper.StartCMResult> _startCMResultMap = new HashMap<String, TestHelper.StartCMResult>();
+ private HelixManager _manager;
+ private TaskDriver _driver;
+
+ @BeforeClass
+ public void beforeClass()
+ throws Exception
+ {
+ String namespace = "/" + CLUSTER_NAME;
+ if (_gZkClient.exists(namespace))
+ {
+ _gZkClient.deleteRecursive(namespace);
+ }
+
+ ClusterSetup setupTool = new ClusterSetup(ZK_ADDR);
+ setupTool.addCluster(CLUSTER_NAME, true);
+ for (int i = 0; i < NUM_NODES; i++)
+ {
+ String storageNodeName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
+ setupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName);
+ }
+
+ // Set up target db
+ setupTool.addResourceToCluster(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB, NUM_PARTITIONS, MASTER_SLAVE_STATE_MODEL);
+ setupTool.rebalanceStorageCluster(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB, NUM_REPLICAS);
+
+ Map<String, TaskFactory> taskFactoryReg = new HashMap<String, TaskFactory>();
+ taskFactoryReg.put("Reindex", new TaskFactory()
+ {
+ @Override
+ public Task createNewTask(String config)
+ {
+ return new ReindexTask(config);
+ }
+ });
+
+ // start dummy participants
+ for (int i = 0; i < NUM_NODES; i++)
+ {
+ String instanceName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
+ TestHelper.StartCMResult result = TestUtil.startDummyProcess(ZK_ADDR, CLUSTER_NAME, instanceName, taskFactoryReg);
+ _startCMResultMap.put(instanceName, result);
+ }
+
+ // start controller
+ String controllerName = CONTROLLER_PREFIX + "_0";
+ TestHelper.StartCMResult startResult = TestHelper.startController(CLUSTER_NAME,
+ controllerName,
+ ZK_ADDR,
+ HelixControllerMain.STANDALONE);
+ _startCMResultMap.put(controllerName, startResult);
+
+ // create cluster manager
+ _manager = HelixManagerFactory.getZKHelixManager(CLUSTER_NAME, "Admin", InstanceType.ADMINISTRATOR, ZK_ADDR);
+ _manager.connect();
+ _driver = new TaskDriver(_manager);
+
+ boolean result = ClusterStateVerifier.verifyByZkCallback(new ClusterStateVerifier.MasterNbInExtViewVerifier(ZK_ADDR,
+ CLUSTER_NAME));
+ Assert.assertTrue(result);
+
+ result = ClusterStateVerifier.verifyByZkCallback(new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR,
+ CLUSTER_NAME));
+ Assert.assertTrue(result);
+ }
+
+ @AfterClass
+ public void afterClass()
+ throws Exception
+ {
+ /**
+ * shutdown order: 1) disconnect the controller 2) disconnect participants
+ */
+
+ TestHelper.StartCMResult result;
+ Iterator<Map.Entry<String, TestHelper.StartCMResult>> it = _startCMResultMap.entrySet().iterator();
+ while (it.hasNext())
+ {
+ String instanceName = it.next().getKey();
+ if (instanceName.startsWith(CONTROLLER_PREFIX))
+ {
+ result = _startCMResultMap.get(instanceName);
+ result._manager.disconnect();
+ result._thread.interrupt();
+ it.remove();
+ }
+ }
+
+ Thread.sleep(100);
+ it = _startCMResultMap.entrySet().iterator();
+ while (it.hasNext())
+ {
+ String instanceName = it.next().getKey();
+ result = _startCMResultMap.get(instanceName);
+ result._manager.disconnect();
+ result._thread.interrupt();
+ it.remove();
+ }
+
+ _manager.disconnect();
+ }
+
+ @Test
+ public void basic()
+ throws Exception
+ {
+ basic(100);
+ }
+
+ @Test
+ public void zeroTaskCompletionTime()
+ throws Exception
+ {
+ basic(0);
+ }
+
+ @Test
+ public void testExpiry() throws Exception
+ {
+ String taskName = "Expiry";
+ long expiry = 1000;
+ Workflow flow = WorkflowGenerator.generateDefaultSingleTaskWorkflowBuilderWithExtraConfigs(taskName,
+ TaskConfig.COMMAND_CONFIG, String.valueOf(100)).setExpiry(expiry).build();
+
+ _driver.start(flow);
+ TestUtil.pollForWorkflowState(_manager, taskName, TaskState.IN_PROGRESS);
+
+ // Running workflow should have config and context viewable through accessor
+ HelixDataAccessor accessor = _manager.getHelixDataAccessor();
+ PropertyKey workflowCfgKey = accessor.keyBuilder().resourceConfig(taskName);
+ String workflowPropStoreKey = Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT, taskName);
+
+ // Ensure context and config exist
+ Assert.assertTrue(_manager.getHelixPropertyStore().exists(workflowPropStoreKey, AccessOption.PERSISTENT));
+ Assert.assertNotSame(accessor.getProperty(workflowCfgKey), null);
+
+ // Wait for task to finish and expire
+ TestUtil.pollForWorkflowState(_manager, taskName, TaskState.COMPLETED);
+ Thread.sleep(expiry);
+ _driver.invokeRebalance();
+ Thread.sleep(expiry);
+
+ // Ensure workflow config and context were cleaned up by now
+ Assert.assertFalse(_manager.getHelixPropertyStore().exists(workflowPropStoreKey, AccessOption.PERSISTENT));
+ Assert.assertEquals(accessor.getProperty(workflowCfgKey), null);
+ }
+
+ private void basic(long taskCompletionTime)
+ throws Exception
+ {
+ // We use a different resource name in each test method as a work around for a helix participant bug where it does
+ // not clear locally cached state when a resource partition is dropped. Once that is fixed we should change these
+ // tests to use the same resource name and implement a beforeMethod that deletes the task resource.
+ final String taskResource = "basic" + taskCompletionTime;
+ Workflow flow = WorkflowGenerator.generateDefaultSingleTaskWorkflowBuilderWithExtraConfigs(taskResource,
+ TaskConfig.COMMAND_CONFIG, String.valueOf(taskCompletionTime)).build();
+ _driver.start(flow);
+
+ // Wait for task completion
+ TestUtil.pollForWorkflowState(_manager, taskResource, TaskState.COMPLETED);
+
+ // Ensure all partitions are completed individually
+ TaskContext ctx = TaskUtil.getTaskContext(_manager, TaskUtil.getNamespacedTaskName(taskResource));
+ for (int i = 0; i < NUM_PARTITIONS; i++)
+ {
+ Assert.assertEquals(ctx.getPartitionState(i), TaskPartitionState.COMPLETED);
+ Assert.assertEquals(ctx.getPartitionNumAttempts(i), 1);
+ }
+ }
+
+ @Test
+ public void partitionSet()
+ throws Exception
+ {
+ final String taskResource = "partitionSet";
+ ImmutableList<Integer> targetPartitions = ImmutableList.of(1, 2, 3, 5, 8, 13);
+
+ // construct and submit our basic workflow
+ Workflow flow = WorkflowGenerator.generateDefaultSingleTaskWorkflowBuilderWithExtraConfigs(taskResource,
+ TaskConfig.COMMAND_CONFIG, String.valueOf(100),
+ TaskConfig.MAX_ATTEMPTS_PER_PARTITION, String.valueOf(1),
+ TaskConfig.TARGET_PARTITIONS, Joiner.on(",").join(targetPartitions)).build();
+ _driver.start(flow);
+
+ // wait for task completeness/timeout
+ TestUtil.pollForWorkflowState(_manager, taskResource, TaskState.COMPLETED);
+
+ // see if resulting context completed successfully for our partition set
+ String namespacedName = TaskUtil.getNamespacedTaskName(taskResource);
+
+ TaskContext ctx = TaskUtil.getTaskContext(_manager, namespacedName);
+ WorkflowContext workflowContext = TaskUtil.getWorkflowContext(_manager, taskResource);
+ Assert.assertNotNull(ctx);
+ Assert.assertNotNull(workflowContext);
+ Assert.assertEquals(workflowContext.getTaskState(namespacedName), TaskState.COMPLETED);
+ for (int i : targetPartitions)
+ {
+ Assert.assertEquals(ctx.getPartitionState(i), TaskPartitionState.COMPLETED);
+ Assert.assertEquals(ctx.getPartitionNumAttempts(i), 1);
+ }
+ }
+
+ @Test
+ public void testRepeatedWorkflow() throws Exception
+ {
+ String workflowName = "SomeWorkflow";
+ Workflow flow = WorkflowGenerator.generateDefaultRepeatedTaskWorkflowBuilder(workflowName).build();
+ new TaskDriver(_manager).start(flow);
+
+ // Wait until the task completes
+ TestUtil.pollForWorkflowState(_manager, workflowName, TaskState.COMPLETED);
+
+ // Assert completion for all tasks within two minutes
+ for(String task : flow.getTaskConfigs().keySet())
+ {
+ TestUtil.pollForTaskState(_manager, workflowName, task, TaskState.COMPLETED);
+ }
+ }
+
+ @Test
+ public void timeouts()
+ throws Exception
+ {
+ final String taskResource = "timeouts";
+ Workflow flow = WorkflowGenerator.generateDefaultSingleTaskWorkflowBuilderWithExtraConfigs(taskResource,
+ TaskConfig.MAX_ATTEMPTS_PER_PARTITION, String.valueOf(2),
+ TaskConfig.TIMEOUT_PER_PARTITION, String.valueOf(100)).build();
+ _driver.start(flow);
+
+ // Wait until the task reports failure.
+ TestUtil.pollForWorkflowState(_manager, taskResource, TaskState.FAILED);
+
+ // Check that all partitions timed out up to maxAttempts
+ TaskContext ctx = TaskUtil.getTaskContext(_manager, TaskUtil.getNamespacedTaskName(taskResource));
+ int maxAttempts = 0;
+ for (int i = 0; i < NUM_PARTITIONS; i++)
+ {
+ TaskPartitionState state = ctx.getPartitionState(i);
+ if (state != null)
+ {
+ Assert.assertEquals(state, TaskPartitionState.TIMED_OUT);
+ maxAttempts = Math.max(maxAttempts, ctx.getPartitionNumAttempts(i));
+ }
+ }
+ Assert.assertEquals(maxAttempts, 2);
+ }
+
+ private static class ReindexTask implements Task
+ {
+ private final long _delay;
+ private volatile boolean _canceled;
+
+ public ReindexTask(String cfg)
+ {
+ _delay = Long.parseLong(cfg);
+ }
+
+ @Override
+ public TaskResult run()
+ {
+ long expiry = System.currentTimeMillis() + _delay;
+ long timeLeft;
+ while (System.currentTimeMillis() < expiry)
+ {
+ if (_canceled)
+ {
+ timeLeft = expiry - System.currentTimeMillis();
+ return new TaskResult(TaskResult.Status.CANCELED, String.valueOf(timeLeft < 0 ? 0 : timeLeft));
+ }
+ sleep(50);
+ }
+ timeLeft = expiry - System.currentTimeMillis();
+ return new TaskResult(TaskResult.Status.COMPLETED, String.valueOf(timeLeft < 0 ? 0 : timeLeft));
+ }
+
+ @Override
+ public void cancel()
+ {
+ _canceled = true;
+ }
+
+ private static void sleep(long d)
+ {
+ try
+ {
+ Thread.sleep(d);
+ }
+ catch (InterruptedException e)
+ {
+ e.printStackTrace();
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/test/java/org/apache/helix/integration/task/TestTaskRebalancerStopResume.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/integration/task/TestTaskRebalancerStopResume.java b/helix-core/src/test/java/org/apache/helix/integration/task/TestTaskRebalancerStopResume.java
new file mode 100644
index 0000000..4c17397
--- /dev/null
+++ b/helix-core/src/test/java/org/apache/helix/integration/task/TestTaskRebalancerStopResume.java
@@ -0,0 +1,231 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.integration.task;
+
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.TestHelper;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.integration.ZkIntegrationTestBase;
+import org.apache.helix.integration.ZkStandAloneCMTestBase;
+import org.apache.helix.task.*;
+import org.apache.helix.tools.ClusterSetup;
+import org.apache.helix.tools.ClusterStateVerifier;
+import org.apache.log4j.Logger;
+import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+
+/**
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TestTaskRebalancerStopResume extends ZkIntegrationTestBase
+{
+ private static final Logger LOG = Logger.getLogger(ZkStandAloneCMTestBase.class);
+ private static final int NUM_NODES = 5;
+ private static final int START_PORT = 12918;
+ private static final String MASTER_SLAVE_STATE_MODEL = "MasterSlave";
+ private static final String TGT_DB = "TestDB";
+ private static final String TASK_RESOURCE = "SomeTask";
+ private static final int NUM_PARTITIONS = 20;
+ private static final int NUM_REPLICAS = 3;
+ private final String CLUSTER_NAME = CLUSTER_PREFIX + "_" + getShortClassName();
+ private final Map<String, TestHelper.StartCMResult> _startCMResultMap = new HashMap<String, TestHelper.StartCMResult>();
+ private HelixManager _manager;
+ private TaskDriver _driver;
+
+ @BeforeClass
+ public void beforeClass()
+ throws Exception
+ {
+ String namespace = "/" + CLUSTER_NAME;
+ if (_gZkClient.exists(namespace))
+ {
+ _gZkClient.deleteRecursive(namespace);
+ }
+
+ ClusterSetup setupTool = new ClusterSetup(ZK_ADDR);
+ setupTool.addCluster(CLUSTER_NAME, true);
+ for (int i = 0; i < NUM_NODES; i++)
+ {
+ String storageNodeName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
+ setupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName);
+ }
+
+ // Set up target db
+ setupTool.addResourceToCluster(CLUSTER_NAME, TGT_DB, NUM_PARTITIONS, MASTER_SLAVE_STATE_MODEL);
+ setupTool.rebalanceStorageCluster(CLUSTER_NAME, TGT_DB, NUM_REPLICAS);
+
+ Map<String, TaskFactory> taskFactoryReg = new HashMap<String, TaskFactory>();
+ taskFactoryReg.put("Reindex", new TaskFactory()
+ {
+ @Override
+ public Task createNewTask(String config)
+ {
+ return new ReindexTask(config);
+ }
+ });
+
+ // start dummy participants
+ for (int i = 0; i < NUM_NODES; i++)
+ {
+ String instanceName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
+ TestHelper.StartCMResult result = TestUtil.startDummyProcess(ZK_ADDR, CLUSTER_NAME, instanceName, taskFactoryReg);
+ _startCMResultMap.put(instanceName, result);
+ }
+
+ // start controller
+ String controllerName = CONTROLLER_PREFIX + "_0";
+ TestHelper.StartCMResult startResult = TestHelper.startController(CLUSTER_NAME,
+ controllerName,
+ ZK_ADDR,
+ HelixControllerMain.STANDALONE);
+ _startCMResultMap.put(controllerName, startResult);
+
+ // create cluster manager
+ _manager = HelixManagerFactory.getZKHelixManager(CLUSTER_NAME, "Admin", InstanceType.ADMINISTRATOR, ZK_ADDR);
+ _manager.connect();
+
+ _driver = new TaskDriver(_manager);
+
+ boolean result = ClusterStateVerifier.verifyByZkCallback(new ClusterStateVerifier.MasterNbInExtViewVerifier(ZK_ADDR,
+ CLUSTER_NAME));
+ Assert.assertTrue(result);
+
+ result = ClusterStateVerifier.verifyByZkCallback(new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR,
+ CLUSTER_NAME));
+ Assert.assertTrue(result);
+ }
+
+ @AfterClass
+ public void afterClass()
+ throws Exception
+ {
+ /**
+ * shutdown order: 1) disconnect the controller 2) disconnect participants
+ */
+
+ TestHelper.StartCMResult result;
+ Iterator<Map.Entry<String, TestHelper.StartCMResult>> it = _startCMResultMap.entrySet().iterator();
+ while (it.hasNext())
+ {
+ String instanceName = it.next().getKey();
+ if (instanceName.startsWith(CONTROLLER_PREFIX))
+ {
+ result = _startCMResultMap.get(instanceName);
+ result._manager.disconnect();
+ result._thread.interrupt();
+ it.remove();
+ }
+ }
+
+ Thread.sleep(100);
+ it = _startCMResultMap.entrySet().iterator();
+ while (it.hasNext())
+ {
+ String instanceName = it.next().getKey();
+ result = _startCMResultMap.get(instanceName);
+ result._manager.disconnect();
+ result._thread.interrupt();
+ it.remove();
+ }
+
+ _manager.disconnect();
+ }
+
+ @Test
+ public void stopAndResume()
+ throws Exception
+ {
+ Workflow flow = WorkflowGenerator.generateDefaultSingleTaskWorkflowBuilderWithExtraConfigs(TASK_RESOURCE,
+ TaskConfig.COMMAND_CONFIG, String.valueOf(100)).build();
+
+ LOG.info("Starting flow " + flow.getName());
+ _driver.start(flow);
+ TestUtil.pollForWorkflowState(_manager, TASK_RESOURCE, TaskState.IN_PROGRESS);
+
+ LOG.info("Pausing task");
+ _driver.stop(TASK_RESOURCE);
+ TestUtil.pollForWorkflowState(_manager, TASK_RESOURCE, TaskState.STOPPED);
+
+ LOG.info("Resuming task");
+ _driver.resume(TASK_RESOURCE);
+ TestUtil.pollForWorkflowState(_manager, TASK_RESOURCE, TaskState.COMPLETED);
+ }
+
+ @Test
+ public void stopAndResumeWorkflow()
+ throws Exception
+ {
+ String workflow = "SomeWorkflow";
+ Workflow flow = WorkflowGenerator.generateDefaultRepeatedTaskWorkflowBuilder(workflow).build();
+
+ LOG.info("Starting flow " + workflow);
+ _driver.start(flow);
+ TestUtil.pollForWorkflowState(_manager, workflow, TaskState.IN_PROGRESS);
+
+ LOG.info("Pausing workflow");
+ _driver.stop(workflow);
+ TestUtil.pollForWorkflowState(_manager, workflow, TaskState.STOPPED);
+
+ LOG.info("Resuming workflow");
+ _driver.resume(workflow);
+ TestUtil.pollForWorkflowState(_manager, workflow, TaskState.COMPLETED);
+ }
+
+ public static class ReindexTask implements Task
+ {
+ private final long _delay;
+ private volatile boolean _canceled;
+
+ public ReindexTask(String cfg)
+ {
+ _delay = Long.parseLong(cfg);
+ }
+
+ @Override
+ public TaskResult run()
+ {
+ long expiry = System.currentTimeMillis() + _delay;
+ long timeLeft;
+ while (System.currentTimeMillis() < expiry)
+ {
+ if (_canceled)
+ {
+ timeLeft = expiry - System.currentTimeMillis();
+ return new TaskResult(TaskResult.Status.CANCELED, String.valueOf(timeLeft < 0 ? 0 : timeLeft));
+ }
+ sleep(50);
+ }
+ timeLeft = expiry - System.currentTimeMillis();
+ return new TaskResult(TaskResult.Status.COMPLETED, String.valueOf(timeLeft < 0 ? 0 : timeLeft));
+ }
+
+ @Override
+ public void cancel()
+ {
+ _canceled = true;
+ }
+
+ private static void sleep(long d)
+ {
+ try
+ {
+ Thread.sleep(d);
+ }
+ catch (InterruptedException e)
+ {
+ e.printStackTrace();
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/test/java/org/apache/helix/integration/task/TestUtil.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/integration/task/TestUtil.java b/helix-core/src/test/java/org/apache/helix/integration/task/TestUtil.java
new file mode 100644
index 0000000..17592b7
--- /dev/null
+++ b/helix-core/src/test/java/org/apache/helix/integration/task/TestUtil.java
@@ -0,0 +1,128 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.integration.task;
+
+
+import java.util.Map;
+import org.apache.helix.HelixManager;
+import org.apache.helix.InstanceType;
+import org.apache.helix.TestHelper;
+import org.apache.helix.ZkHelixTestManager;
+import org.apache.helix.mock.participant.DummyProcess;
+import org.apache.helix.participant.StateMachineEngine;
+import org.apache.helix.task.*;
+import org.apache.log4j.Logger;
+import org.testng.Assert;
+
+
+/**
+ * Static test utility methods.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TestUtil
+{
+ private static final Logger LOG = Logger.getLogger(TestUtil.class);
+
+ public static TestHelper.StartCMResult startDummyProcess(String zkAddr,
+ String clusterName,
+ String instanceName,
+ Map<String, TaskFactory> taskFactoryMap)
+ throws Exception
+ {
+ TestHelper.StartCMResult result = new TestHelper.StartCMResult();
+ ZkHelixTestManager manager = new ZkHelixTestManager(clusterName,
+ instanceName,
+ InstanceType.PARTICIPANT,
+ zkAddr);
+ result._manager = manager;
+ Thread thread = new Thread(new MockInstanceThread(manager, instanceName, taskFactoryMap));
+ result._thread = thread;
+ thread.start();
+
+ return result;
+ }
+
+ /**
+ * Polls {@link org.apache.helix.task.TaskContext} for given task resource until a timeout is reached.
+ * If the task has not reached target state by then, an error is thrown
+ *
+ * @param workflowResource Resource to poll for completeness
+ * @throws InterruptedException
+ */
+ public static void pollForWorkflowState(HelixManager manager, String workflowResource, TaskState state)
+ throws InterruptedException
+ {
+ // Wait for completion.
+ long st = System.currentTimeMillis();
+ WorkflowContext ctx;
+ do
+ {
+ Thread.sleep(100);
+ ctx = TaskUtil.getWorkflowContext(manager, workflowResource);
+ }
+ while ((ctx == null || ctx.getWorkflowState() == null || ctx.getWorkflowState() != state)
+ && System.currentTimeMillis() < st + 2 * 60 * 1000 /* 2 mins */);
+
+ Assert.assertNotNull(ctx);
+ Assert.assertEquals(ctx.getWorkflowState(), state);
+ }
+
+ public static void pollForTaskState(HelixManager manager, String workflowResource, String taskName, TaskState state)
+ throws InterruptedException
+ {
+ // Wait for completion.
+ long st = System.currentTimeMillis();
+ WorkflowContext ctx;
+ do
+ {
+ Thread.sleep(100);
+ ctx = TaskUtil.getWorkflowContext(manager, workflowResource);
+ }
+ while ((ctx == null || ctx.getTaskState(taskName) == null || ctx.getTaskState(taskName) != state)
+ && System.currentTimeMillis() < st + 2 * 60 * 1000 /* 2 mins */);
+
+ Assert.assertNotNull(ctx);
+ Assert.assertEquals(ctx.getWorkflowState(), state);
+ }
+
+ private static class MockInstanceThread implements Runnable
+ {
+ private final HelixManager _manager;
+ private final String _instanceName;
+ private final Map<String, TaskFactory> _factoryMap;
+
+ public MockInstanceThread(HelixManager manager, String instanceName, Map<String, TaskFactory> factoryMap)
+ {
+ _manager = manager;
+ _instanceName = instanceName;
+ _factoryMap = factoryMap;
+ }
+
+ @Override
+ public void run()
+ {
+ try
+ {
+ StateMachineEngine stateMach = _manager.getStateMachineEngine();
+ // Register dummy MasterSlave state model factory.
+ stateMach.registerStateModelFactory("MasterSlave", new DummyProcess.DummyStateModelFactory(0));
+ // Register a Task state model factory.
+ stateMach.registerStateModelFactory("Task", new TaskStateModelFactory(_manager, _factoryMap));
+
+ _manager.connect();
+ Thread.currentThread().join();
+ }
+ catch (InterruptedException e)
+ {
+ LOG.info("participant:" + _instanceName + ", " + Thread.currentThread().getName() + " interrupted");
+ }
+ catch (Exception e)
+ {
+ LOG.error("participant:" + _instanceName + ", " + Thread.currentThread().getName() + " interrupted", e);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/test/java/org/apache/helix/integration/task/WorkflowGenerator.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/integration/task/WorkflowGenerator.java b/helix-core/src/test/java/org/apache/helix/integration/task/WorkflowGenerator.java
new file mode 100644
index 0000000..f096a1a
--- /dev/null
+++ b/helix-core/src/test/java/org/apache/helix/integration/task/WorkflowGenerator.java
@@ -0,0 +1,76 @@
+package org.apache.helix.integration.task;
+
+import org.apache.helix.task.Workflow;
+
+import java.util.Collections;
+import java.util.Map;
+import java.util.TreeMap;
+
+
+/**
+ * Convenience class for generating various test workflows
+ *
+ * @author Chris Beavers <cb...@linkedin.com>
+ */
+public class WorkflowGenerator
+{
+ public static final String DEFAULT_TGT_DB = "TestDB";
+ private static final String TASK_NAME_1 = "SomeTask1";
+ private static final String TASK_NAME_2 = "SomeTask2";
+
+ private static final Map<String, String> DEFAULT_TASK_CONFIG;
+ static {
+ Map<String, String> tmpMap = new TreeMap<String,String>();
+ tmpMap.put("TargetResource", DEFAULT_TGT_DB);
+ tmpMap.put("TargetPartitionStates", "MASTER");
+ tmpMap.put("Command", "Reindex");
+ tmpMap.put("CommandConfig", String.valueOf(2000));
+ tmpMap.put("TimeoutPerPartition", String.valueOf(10 * 1000));
+ DEFAULT_TASK_CONFIG = Collections.unmodifiableMap(tmpMap);
+ }
+
+ public static Workflow.Builder generateDefaultSingleTaskWorkflowBuilderWithExtraConfigs(String taskName,
+ String ... cfgs)
+ {
+ if(cfgs.length % 2 != 0)
+ {
+ throw new IllegalArgumentException("Additional configs should have even number of keys and values");
+ }
+ Workflow.Builder bldr = generateDefaultSingleTaskWorkflowBuilder(taskName);
+ for(int i=0; i<cfgs.length; i+=2)
+ {
+ bldr.addConfig(taskName, cfgs[i], cfgs[i+1]);
+ }
+
+ return bldr;
+ }
+
+ public static Workflow.Builder generateDefaultSingleTaskWorkflowBuilder(String taskName)
+ {
+ return generateSingleTaskWorkflowBuilder(taskName, DEFAULT_TASK_CONFIG);
+ }
+
+ public static Workflow.Builder generateSingleTaskWorkflowBuilder(String taskName, Map<String, String> config)
+ {
+ Workflow.Builder builder = new Workflow.Builder(taskName);
+ for(String key : config.keySet())
+ {
+ builder.addConfig(taskName, key, config.get(key));
+ }
+ return builder;
+ }
+
+ public static Workflow.Builder generateDefaultRepeatedTaskWorkflowBuilder(String workflowName)
+ {
+ Workflow.Builder builder = new Workflow.Builder(workflowName);
+ builder.addParentChildDependency(TASK_NAME_1, TASK_NAME_2);
+
+ for(String key : DEFAULT_TASK_CONFIG.keySet())
+ {
+ builder.addConfig(TASK_NAME_1, key, DEFAULT_TASK_CONFIG.get(key));
+ builder.addConfig(TASK_NAME_2, key, DEFAULT_TASK_CONFIG.get(key));
+ }
+
+ return builder;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/README.md
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/README.md b/recipes/auto-scale/README.md
new file mode 100644
index 0000000..f553246
--- /dev/null
+++ b/recipes/auto-scale/README.md
@@ -0,0 +1,82 @@
+<!---
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+Auto-Scaling with Apache Helix and Apache YARN
+------------------------
+Auto-scaling for helix clusters using a managed and a meta cluster. The managed cluster operates as usual, managing resources and instances via AUTO_REBALANCE. The meta cluster monitors the managed cluster and injects or removes instances based on demand.
+
+The meta cluster makes decisions about scaling up or down based on information obtained from a "ClusterStatusProvider". A custom "ProviderRebalancer" is invoked testing the health of existing participants in the managed cluster with the "ContainerStatusProvider". If participants need to be (re-)deployed the "ContainerProvider" is invoked to instantiate and inject participants in the managed cluster.
+
+ContainerProviders are the participants of the meta cluster and there are multiple different implementations of the "ContainerProvider". First, the "LocalContainerProvider" spawns VM-local participants, i.e. participants of the managed cluster are spawned in the same VM the container provider exists. This is mainly useful for testing. Second, the "ShellContainerProvider" spawns a separate VM process for each participant using shell commands. Third, the "YarnContainerProvider" creates processes as container on a YARN cluster and manages their status using an external meta-data service (Zookeeper in this implementation). This implementation is fairly complex and has a number of external dependencies on a working YARN cluster and running services.
+
+Even though there are different types of providers the notion of a "ContainerProcess" abstracts implementation specifics. A process implementation inherits from "ContainerProcess" and can be instantiated by all three types of container providers. CAUTION: since separate VM process might be used a VM external method for coordination is required (e.g. Zookeeper)
+
+Configuration settings are passed throughout the application using traditional Properties objects. The "ConfigTool" contains default paths and helps to inject dependencies in the ProviderRebalancer.
+
+The application can be run and tested in two ways. First, a comprehensive suite of unit and integration tests can be run using "mvn verify". Second, the "Bootstrapper" can deploy a live managed and meta cluster based on a specification (e.g. "2by2shell.properties").
+
+------------------------
+The IdealState of the meta cluster uses the ONLINE-OFFLINE model and maps as follows in the example below:
+
+Resource: type of container, e.g. database, webserver
+Partition: container id
+Instance: responsible container provider
+
+META:
+
+database
+ database_0
+ provider_0 : ONLINE
+ database_1
+ provider_1 : ONLINE
+webserver
+ webserver_0
+ provider_0 : ONLINE
+ webserver_1
+ provider_1 : ONLINE
+ webserver_2
+ provider_0 : ONLINE
+
+
+MANAGED:
+
+dbprod (tag=database)
+ dbprod_0
+ database_0 : MASTER
+ database_1 : SLAVE
+ dbprod_1
+ database_0 : SLAVE
+ database_1 : MASTER
+ dbprod_2
+ database_0 : MASTER
+ database_1 : SLAVE
+wsprod (tag=webserver)
+ wsprod_0
+ webserver_0 : ONLINE
+ wsprod_1
+ webserver_1 : ONLINE
+ wsprod_2
+ webserver_2 : ONLINE
+ wsprod_3
+ webserver_0 : ONLINE
+ wsprod_4
+ webserver_1 : ONLINE
+ wsprod_5
+ webserver_2 : ONLINE
+
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/pom.xml
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/pom.xml b/recipes/auto-scale/pom.xml
new file mode 100644
index 0000000..95331f4
--- /dev/null
+++ b/recipes/auto-scale/pom.xml
@@ -0,0 +1,210 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.helix.recipes</groupId>
+ <artifactId>recipes</artifactId>
+ <version>0.6.2-incubating-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>auto-scale</artifactId>
+ <packaging>jar</packaging>
+ <name>Apache Helix :: Recipes :: Auto-Scale</name>
+
+ <properties>
+ <hadoop.version>0.23.9</hadoop.version>
+
+ <ut.groups>unit</ut.groups>
+ <it.groups>local, shell</it.groups>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.testng</groupId>
+ <artifactId>testng</artifactId>
+ <version>6.0.1</version>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ <version>2.2.4</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.helix</groupId>
+ <artifactId>helix-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>javax.mail</groupId>
+ <artifactId>mail</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.jms</groupId>
+ <artifactId>jms</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jdmk</groupId>
+ <artifactId>jmxtools</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jmx</groupId>
+ <artifactId>jmxri</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>14.0.1</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-api</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-common</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>appassembler-maven-plugin</artifactId>
+ <configuration>
+ <configurationDirectory>conf</configurationDirectory>
+ <copyConfigurationDirectory>true</copyConfigurationDirectory>
+ <includeConfigurationDirectoryInClasspath>true</includeConfigurationDirectoryInClasspath>
+ <assembleDirectory>${project.build.directory}/metamanager-pkg</assembleDirectory>
+ <extraJvmArguments>-Xms512m -Xmx512m</extraJvmArguments>
+ <platforms>
+ <platform>unix</platform>
+ </platforms>
+ </configuration>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>assemble</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <configuration>
+ <excludes combine.children="append">
+ </excludes>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>appassembler-maven-plugin</artifactId>
+ <configuration>
+ <programs>
+ <program>
+ <mainClass>org.apache.helix.autoscale.bootstrapper.Boot</mainClass>
+ <name>boot</name>
+ </program>
+ <program>
+ <mainClass>org.apache.helix.autoscale.impl.shell.ShellContainerProcess</mainClass>
+ <name>shell-container-process</name>
+ </program>
+ <program>
+ <mainClass>org.apache.helix.autoscale.impl.yarn.YarnMasterProcess</mainClass>
+ <name>yarn-master-process</name>
+ </program>
+ <program>
+ <mainClass>org.apache.helix.autoscale.impl.yarn.YarnContainerProcess</mainClass>
+ <name>yarn-container-process</name>
+ </program>
+ <program>
+ <mainClass>org.apache.helix.autoscale.ZookeeperSetter</mainClass>
+ <name>zookeeper-setter</name>
+ </program>
+ </programs>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <finalName>metamanager</finalName>
+ <descriptor>src/main/assembly/assembly.xml</descriptor>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <groups>${ut.groups}</groups>
+ <excludedGroups>integration</excludedGroups>
+ <suiteXmlFiles>
+ <suiteXmlFile>src/test/config/testng.xml</suiteXmlFile>
+ </suiteXmlFiles>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ <configuration>
+ <groups>${it.groups}</groups>
+ <excludedGroups>unit</excludedGroups>
+ <suiteXmlFiles>
+ <suiteXmlFile>src/test/config/testng.xml</suiteXmlFile>
+ </suiteXmlFiles>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>integration-test</goal>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+ <profiles>
+ <profile>
+ <id>yarn</id>
+ <properties>
+ <it.groups>yarn</it.groups>
+ </properties>
+ </profile>
+ </profiles>
+</project>
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/assembly/assembly.xml
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/assembly/assembly.xml b/recipes/auto-scale/src/main/assembly/assembly.xml
new file mode 100644
index 0000000..03b2ca5
--- /dev/null
+++ b/recipes/auto-scale/src/main/assembly/assembly.xml
@@ -0,0 +1,32 @@
+<assembly
+ xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2
+ http://maven.apache.org/xsd/assembly-1.1.2.xsd">
+
+ <id>assembly</id>
+ <formats>
+ <format>tar.gz</format>
+ </formats>
+ <baseDirectory>metamanager</baseDirectory>
+ <fileSets>
+ <fileSet>
+ <directory>target/metamanager-pkg/repo</directory>
+ <outputDirectory>repo</outputDirectory>
+ <excludes>
+ <exclude>**/maven-metadata-appassembler.xml</exclude>
+ </excludes>
+ <fileMode>0644</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>target/metamanager-pkg/bin</directory>
+ <outputDirectory>bin</outputDirectory>
+ <fileMode>0755</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>target/metamanager-pkg/conf</directory>
+ <outputDirectory>conf</outputDirectory>
+ <fileMode>0644</fileMode>
+ </fileSet>
+ </fileSets>
+</assembly>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/config/log4j.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/config/log4j.properties b/recipes/auto-scale/src/main/config/log4j.properties
new file mode 100644
index 0000000..7f29be2
--- /dev/null
+++ b/recipes/auto-scale/src/main/config/log4j.properties
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Set root logger level to DEBUG and its only appender to R.
+log4j.rootLogger=ERROR, C
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.C=org.apache.log4j.ConsoleAppender
+log4j.appender.C.layout=org.apache.log4j.PatternLayout
+log4j.appender.C.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.I0Itec=ERROR
+log4j.logger.org.apache=ERROR
+
+log4j.logger.org.apache.helix.autoscale=INFO
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ClusterAdmin.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ClusterAdmin.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ClusterAdmin.java
new file mode 100644
index 0000000..f7808bd
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ClusterAdmin.java
@@ -0,0 +1,30 @@
+package org.apache.helix.autoscale;
+
+/**
+ * Abstraction for instance config (container) injection into and removal from
+ * the managed cluster.
+ *
+ */
+public interface ClusterAdmin {
+
+ /**
+ * Add instance configuration to managed cluster.
+ *
+ * @param instanceId
+ * @param instanceTag
+ */
+ public void addInstance(String instanceId, String instanceTag);
+
+ /**
+ * Remove instance configuration from managed cluster.<br/>
+ * <b>INVARIANT:</b> idempotent
+ *
+ * @param instanceId
+ */
+ public void removeInstance(String instanceId);
+
+ /**
+ * Trigger rebalance of any affected resource in the managed cluster.
+ */
+ public void rebalance();
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ContainerProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ContainerProvider.java
new file mode 100644
index 0000000..8409ba3
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ContainerProvider.java
@@ -0,0 +1,40 @@
+package org.apache.helix.autoscale;
+
+import org.apache.helix.autoscale.provider.ProviderStateModel;
+
+/**
+ * Abstraction for container deployment framework. Creates and destroys
+ * container instances. Is invoked by ProviderStateModel and must be blocking.
+ *
+ * @see ProviderStateModel
+ */
+public interface ContainerProvider {
+ /**
+ * Create container of given type.<br/>
+ * <b>INVARIANT:</b> synchronous invocation
+ *
+ * @param id
+ * unique user-defined container id
+ * @param containerType
+ * container type
+ * @throws Exception
+ */
+ public void create(String id, String containerType) throws Exception;
+
+ /**
+ * Destroy container.<br/>
+ * <b>INVARIANT:</b> synchronous invocation
+ *
+ * @param id
+ * unique user-defined container id
+ * @throws Exception
+ */
+ public void destroy(String id) throws Exception;
+
+ /**
+ * Stops all running processes and destroys containers. Best-effort for
+ * cleanup.
+ *
+ */
+ public void destroyAll();
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ContainerProviderService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ContainerProviderService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ContainerProviderService.java
new file mode 100644
index 0000000..1c39b7c
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ContainerProviderService.java
@@ -0,0 +1,9 @@
+package org.apache.helix.autoscale;
+
+/**
+ * ContainerProvider as configurable service.
+ *
+ */
+public interface ContainerProviderService extends ContainerProvider, Service {
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/HelixClusterAdmin.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/HelixClusterAdmin.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/HelixClusterAdmin.java
new file mode 100644
index 0000000..0eb1362
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/HelixClusterAdmin.java
@@ -0,0 +1,43 @@
+package org.apache.helix.autoscale;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.log4j.Logger;
+
+/**
+ * Implementation of ClusterAdmin based on Helix.
+ *
+ */
+public class HelixClusterAdmin implements ClusterAdmin {
+
+ static final Logger log = Logger.getLogger(HelixClusterAdmin.class);
+
+ final String cluster;
+ final HelixAdmin admin;
+
+ public HelixClusterAdmin(String clusterName, HelixAdmin admin) {
+ this.cluster = clusterName;
+ this.admin = admin;
+ }
+
+ @Override
+ public synchronized void addInstance(String instanceId, String instanceTag) {
+ log.debug(String.format("injecting instance %s (tag=%s) in cluster %s", instanceId, instanceTag, cluster));
+ admin.addInstance(cluster, new InstanceConfig(instanceId));
+ admin.addInstanceTag(cluster, instanceId, instanceTag);
+ }
+
+ @Override
+ public synchronized void removeInstance(String connection) {
+ log.debug(String.format("removing instance %s from cluster %s", connection, cluster));
+ admin.dropInstance(cluster, new InstanceConfig(connection));
+ }
+
+ @Override
+ public void rebalance() {
+ for (String resourceName : admin.getResourcesInCluster(cluster)) {
+ int replica = Integer.parseInt(admin.getResourceIdealState(cluster, resourceName).getReplicas());
+ admin.rebalance(cluster, resourceName, replica);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/Service.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/Service.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/Service.java
new file mode 100644
index 0000000..a60d110
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/Service.java
@@ -0,0 +1,38 @@
+package org.apache.helix.autoscale;
+
+import java.util.Properties;
+
+/**
+ * Abstraction for configurable and runnable service. Light-weight dependency
+ * injection and life-cycle management.
+ *
+ */
+public interface Service {
+
+ /**
+ * Configure service internals<br/>
+ * <b>INVARIANT:</b> executed only once
+ *
+ * @param properties
+ * arbitrary key-value properties, parsed internally
+ * @throws Exception
+ */
+ void configure(Properties properties) throws Exception;
+
+ /**
+ * Start service.<br/>
+ * <b>PRECONDITION:</b> configure() was invoked<br/>
+ * <b>INVARIANT:</b> executed only once
+ *
+ * @throws Exception
+ */
+ void start() throws Exception;
+
+ /**
+ * Stop service.<br/>
+ * <b>INVARIANT:</b> idempotent
+ *
+ * @throws Exception
+ */
+ void stop() throws Exception;
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/StatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/StatusProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/StatusProvider.java
new file mode 100644
index 0000000..ce0c29d
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/StatusProvider.java
@@ -0,0 +1,35 @@
+package org.apache.helix.autoscale;
+
+import org.apache.helix.autoscale.provider.ProviderRebalancer;
+
+/**
+ * Abstraction for status reader of container deployment framework. Provides
+ * information on physical existence of container and activity or failure state.
+ * Is polled by ProviderRebalancer and should be light-weight and non-blocking.<br/>
+ * <b>NOTE:</b> This information is solely based on the low-level framework and
+ * may be different from the participant state in Helix. (The Helix participant
+ * may not even exist)
+ *
+ * @see ProviderRebalancer
+ */
+public interface StatusProvider {
+
+ /**
+ * Determine whether container physically exists.
+ *
+ * @param id
+ * unique container id
+ * @return true, if container is present
+ */
+ public boolean exists(String id);
+
+ /**
+ * Determine whether container is healthy as determined by the deployment
+ * framework.
+ *
+ * @param id
+ * unique container id
+ * @return true, if container is healthy
+ */
+ public boolean isHealthy(String id);
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/StatusProviderService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/StatusProviderService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/StatusProviderService.java
new file mode 100644
index 0000000..756fe4a
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/StatusProviderService.java
@@ -0,0 +1,9 @@
+package org.apache.helix.autoscale;
+
+/**
+ * StatusProvider as configurable service.
+ *
+ */
+public interface StatusProviderService extends StatusProvider, Service {
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/TargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/TargetProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/TargetProvider.java
new file mode 100644
index 0000000..243a977
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/TargetProvider.java
@@ -0,0 +1,25 @@
+package org.apache.helix.autoscale;
+
+import org.apache.helix.autoscale.provider.ProviderRebalancer;
+
+/**
+ * Abstraction for target computation and statistics collection. Provides target
+ * count of containers for ProviderRebalancer. Is polled by ProviderRebalancer
+ * and should be light-weight and non-blocking.<br/>
+ * <b>NOTE:</b> The target count is oblivious of failed containers and can be
+ * obtained in an arbitrary way. See implementations for examples.
+ *
+ * @see ProviderRebalancer
+ */
+public interface TargetProvider {
+
+ /**
+ * Return target count of containers of a specific type.
+ *
+ * @param containerType
+ * meta resource name
+ * @return container count >= 1
+ * @throws Exception
+ */
+ public int getTargetContainerCount(String containerType) throws Exception;
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/TargetProviderService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/TargetProviderService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/TargetProviderService.java
new file mode 100644
index 0000000..a9ce207
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/TargetProviderService.java
@@ -0,0 +1,9 @@
+package org.apache.helix.autoscale;
+
+/**
+ * TargetProvider as configurable service.
+ *
+ */
+public interface TargetProviderService extends TargetProvider, Service {
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ZookeeperSetter.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ZookeeperSetter.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ZookeeperSetter.java
new file mode 100644
index 0000000..0c47999
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/ZookeeperSetter.java
@@ -0,0 +1,30 @@
+package org.apache.helix.autoscale;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.log4j.Logger;
+
+/**
+ * Utility for setting String values in the embedded zookeeper service.
+ * (Program entry point)
+ *
+ */
+public class ZookeeperSetter {
+
+ static Logger log = Logger.getLogger(ZookeeperSetter.class);
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) {
+ String address = args[0];
+ String path = args[1];
+ String value = args[2];
+
+ log.info(String.format("Setting %s:%s to '%s'", address, path, value));
+
+ ZkClient client = new ZkClient(address);
+ client.createPersistent(path, true);
+ client.writeData(path, value);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/Boot.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/Boot.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/Boot.java
new file mode 100644
index 0000000..a501c9a
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/Boot.java
@@ -0,0 +1,132 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.autoscale.Service;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+/**
+ * Bootstrapper for elastic cluster deployment using *.properties configuration
+ * files. (Program entry point)
+ *
+ */
+public class Boot implements Service {
+
+ static final Logger log = Logger.getLogger(Boot.class);
+
+ static final Map<String, Class<? extends Service>> classes = new HashMap<String, Class<? extends Service>>();
+ static {
+ classes.put("zookeeper", ZookeeperService.class);
+ classes.put("cluster", ClusterService.class);
+ classes.put("resource", ResourceService.class);
+ classes.put("controller", ControllerService.class);
+ classes.put("metacluster", MetaClusterService.class);
+ classes.put("metaresource", MetaResourceService.class);
+ classes.put("metaprovider", MetaProviderService.class);
+ classes.put("metacontroller", MetaControllerService.class);
+ }
+
+ static final List<String> serviceOrder = Arrays.asList("zookeeper", "cluster", "resource", "metacluster", "metaresource",
+ "metaprovider", "controller", "metacontroller");
+
+ Properties properties;
+ List<Service> services = Lists.newArrayList();
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ Preconditions.checkNotNull(properties);
+ this.properties = properties;
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.info(String.format("bootstraping started"));
+
+ for (String key : serviceOrder) {
+ if (BootUtils.hasNamespace(properties, key + ".0")) {
+ processIndexedNamespace(key);
+ } else if (BootUtils.hasNamespace(properties, key)) {
+ processNamespace(key);
+ }
+ }
+
+ log.info(String.format("bootstraping completed"));
+ }
+
+ private void processIndexedNamespace(String key) throws Exception {
+ int i = 0;
+ String indexedKey = key + "." + i;
+
+ while (BootUtils.hasNamespace(properties, indexedKey)) {
+ log.info(String.format("processing namespace '%s'", indexedKey));
+ Service service = BootUtils.createInstance(classes.get(key));
+ service.configure(BootUtils.getNamespace(properties, indexedKey));
+ service.start();
+
+ services.add(service);
+
+ i++;
+ indexedKey = key + "." + i;
+ }
+ }
+
+ private void processNamespace(String key) throws Exception {
+ log.info(String.format("processing namespace '%s'", key));
+ Service service = BootUtils.createInstance(classes.get(key));
+ service.configure(BootUtils.getNamespace(properties, key));
+ service.start();
+
+ services.add(service);
+ }
+
+ @Override
+ public void stop() throws Exception {
+ log.info(String.format("shutdown started"));
+
+ Collections.reverse(services);
+ for (Service service : services) {
+ service.stop();
+ }
+
+ log.info(String.format("shutdown completed"));
+ }
+
+ public Collection<Service> getServcies() {
+ return services;
+ }
+
+ public static void main(String[] args) throws Exception {
+ if (args.length < 1) {
+ log.error(String.format("Usage: Boot properties_path"));
+ return;
+ }
+
+ String resourcePath = args[0];
+
+ log.info(String.format("reading definition from '%s'", resourcePath));
+ Properties properties = new Properties();
+ properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+
+ final Boot boot = new Boot();
+ boot.configure(properties);
+ boot.start();
+
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ log.debug("Running shutdown hook");
+ try { boot.stop(); } catch (Exception ignore) {}
+ }
+ }));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/BootUtils.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/BootUtils.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/BootUtils.java
new file mode 100644
index 0000000..53498b9
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/BootUtils.java
@@ -0,0 +1,104 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.Collection;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+/**
+ * Utility for instantiating bootstrapping services and parsing hierarchical
+ * properties files.
+ *
+ */
+public class BootUtils {
+
+ public static final String CLASS_PROPERTY = "class";
+ static final Logger log = Logger.getLogger(BootUtils.class);
+
+ public static boolean hasNamespace(Properties properties, String namespace) {
+ String prefix = namespace + ".";
+ for (String key : properties.stringPropertyNames()) {
+ if (key.startsWith(prefix))
+ return true;
+ }
+ return false;
+ }
+
+ public static Set<String> getNamespaces(Properties properties) {
+ Pattern pattern = Pattern.compile("^([^\\.\\=]+)");
+
+ Set<String> namespaces = Sets.newHashSet();
+
+ for (Map.Entry<Object, Object> rawEntry : properties.entrySet()) {
+ String key = (String) rawEntry.getKey();
+
+ Matcher matcher = pattern.matcher(key);
+ if (matcher.find()) {
+ namespaces.add(matcher.group(1));
+ }
+ }
+
+ return namespaces;
+ }
+
+ public static Properties getNamespace(Properties source, String namespace) {
+ Properties dest = new Properties();
+ String prefix = namespace + ".";
+
+ for (Map.Entry<Object, Object> rawEntry : source.entrySet()) {
+ String key = (String) rawEntry.getKey();
+ String value = (String) rawEntry.getValue();
+
+ if (key.startsWith(prefix)) {
+ String newKey = key.substring(prefix.length());
+ dest.put(newKey, value);
+ }
+ }
+
+ return dest;
+ }
+
+ public static Collection<Properties> getContainerProps(Properties properties) {
+ Collection<Properties> containerProps = Lists.newArrayList();
+
+ String containers = properties.getProperty("containers");
+ String containerTypes[] = StringUtils.split(containers, ",");
+
+ for (String containerType : containerTypes) {
+ Properties containerProp = BootUtils.getNamespace(BootUtils.getNamespace(properties, "container"), containerType);
+ log.debug(String.format("adding container type (type='%s', properties='%s')", containerType, containerProp));
+ containerProps.add(containerProp);
+ }
+
+ return containerProps;
+ }
+
+ @SuppressWarnings("unchecked")
+ public static <T> T createInstance(Class<?> clazz) throws Exception {
+ try {
+ log.debug(String.format("checking for default constructor in class '%s'", clazz.getSimpleName()));
+ return (T) clazz.getConstructor().newInstance();
+ } catch (Exception e) {
+ log.debug("no default constructor found");
+ }
+
+ throw new Exception(String.format("no suitable constructor for class '%s'", clazz.getSimpleName()));
+ }
+
+ public static <T> T createInstance(String className) throws Exception {
+ return createInstance(Class.forName(className));
+ }
+
+ private BootUtils() {
+ // left blank
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ClusterService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ClusterService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ClusterService.java
new file mode 100644
index 0000000..730740f
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ClusterService.java
@@ -0,0 +1,46 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+/**
+ * Cluster bootstrapping. Create Helix data structures in zookeeper for the
+ * managed cluster.
+ *
+ */
+public class ClusterService implements Service {
+
+ static final Logger log = Logger.getLogger(ClusterService.class);
+
+ String name;
+ String address;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ name = properties.getProperty("name", "cluster");
+ address = properties.getProperty("address", "localhost:2199");
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.info(String.format("setting up '%s/%s'", address, name));
+ HelixAdmin admin = new ZKHelixAdmin(address);
+ admin.addCluster(name, false);
+ admin.addStateModelDef(name, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+ admin.addStateModelDef(name, "MasterSlave", new StateModelDefinition(StateModelConfigGenerator.generateConfigForMasterSlave()));
+ admin.close();
+ log.info("setup complete");
+ }
+
+ @Override
+ public void stop() throws Exception {
+ // left blank
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ControllerService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ControllerService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ControllerService.java
new file mode 100644
index 0000000..5e659f2
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ControllerService.java
@@ -0,0 +1,50 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.Properties;
+import java.util.concurrent.ScheduledExecutorService;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.log4j.Logger;
+
+/**
+ * Helix controller bootstrapping and management. Create standalone controller
+ * for managed Helix cluster.
+ *
+ */
+public class ControllerService implements Service {
+
+ static final Logger log = Logger.getLogger(ControllerService.class);
+
+ String name;
+ String cluster;
+ String address;
+
+ HelixManager manager;
+
+ ScheduledExecutorService executor;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ name = properties.getProperty("name", "controller");
+ cluster = properties.getProperty("cluster", "cluster");
+ address = properties.getProperty("address", "localhost:2199");
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.info(String.format("starting controller '%s' at '%s/%s'", name, address, cluster));
+ manager = HelixControllerMain.startHelixController(address, cluster, name, HelixControllerMain.STANDALONE);
+ }
+
+ @Override
+ public void stop() throws Exception {
+ if (manager != null) {
+ log.info(String.format("stopping controller '%s' at '%s/%s'", name, address, cluster));
+ manager.disconnect();
+ manager = null;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaClusterService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaClusterService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaClusterService.java
new file mode 100644
index 0000000..30ecb4d
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaClusterService.java
@@ -0,0 +1,61 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+/**
+ * Meta cluster bootstrapping. Create Helix data structures in zookeeper for
+ * the meta cluster.
+ *
+ */
+public class MetaClusterService implements Service {
+
+ static final Logger log = Logger.getLogger(MetaClusterService.class);
+
+ String name;
+ String address;
+ String managedCluster;
+ String managedAddress;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ name = properties.getProperty("name", "metacluster");
+ address = properties.getProperty("address", "localhost:2199");
+ managedCluster = properties.getProperty("managedcluster", "cluster");
+ managedAddress = properties.getProperty("managedaddress", "localhost:2199");
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.info(String.format("setting up '%s/%s'", address, name));
+ HelixAdmin admin = new ZKHelixAdmin(address);
+ admin.addCluster(name, false);
+ admin.addStateModelDef(name, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+
+ HelixConfigScope scope = new HelixConfigScopeBuilder(ConfigScopeProperty.CLUSTER, name).build();
+ Map<String, String> properties = new HashMap<String, String>();
+ properties.put("cluster", managedCluster);
+ properties.put("address", managedAddress);
+ admin.setConfig(scope, properties);
+
+ admin.close();
+ log.info("setup complete");
+ }
+
+ @Override
+ public void stop() throws Exception {
+ // left blank
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaControllerService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaControllerService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaControllerService.java
new file mode 100644
index 0000000..7a0221c
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaControllerService.java
@@ -0,0 +1,114 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.Properties;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.StatusProviderService;
+import org.apache.helix.autoscale.TargetProviderService;
+import org.apache.helix.autoscale.provider.ProviderRebalancerSingleton;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.model.IdealState;
+import org.apache.log4j.Logger;
+
+/**
+ * Meta cluster controller bootstrapping and management. Create standalone
+ * controller for Helix meta cluster. Spawn StatusProvider and TargetProvider
+ * and trigger periodic status refresh in meta cluster.
+ *
+ */
+public class MetaControllerService implements Service {
+
+ static final Logger log = Logger.getLogger(MetaControllerService.class);
+
+ String name;
+ String metacluster;
+ String metaaddress;
+ long autorefresh;
+
+ HelixManager manager;
+ StatusProviderService statusService;
+ TargetProviderService targetService;
+ ScheduledExecutorService executor;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ name = properties.getProperty("name", "controller");
+ metacluster = properties.getProperty("metacluster", "metacluster");
+ metaaddress = properties.getProperty("metaaddress", "localhost:2199");
+ autorefresh = Long.valueOf(properties.getProperty("autorefresh", "0"));
+
+ Properties statusProperties = BootUtils.getNamespace(properties, "status");
+ statusService = BootUtils.createInstance(Class.forName(statusProperties.getProperty("class")));
+ statusService.configure(statusProperties);
+ ProviderRebalancerSingleton.setStatusProvider(statusService);
+
+ Properties targetProperties = BootUtils.getNamespace(properties, "target");
+ targetService = BootUtils.createInstance(Class.forName(targetProperties.getProperty("class")));
+ targetService.configure(targetProperties);
+ ProviderRebalancerSingleton.setTargetProvider(targetService);
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.debug("Starting status service");
+ statusService.start();
+
+ log.debug("Starting target service");
+ targetService.start();
+
+ log.info(String.format("starting controller '%s' at '%s/%s'", name, metaaddress, metacluster));
+ manager = HelixControllerMain.startHelixController(metaaddress, metacluster, name, HelixControllerMain.STANDALONE);
+
+ if (autorefresh > 0) {
+ log.debug(String.format("installing autorefresh with interval %d ms", autorefresh));
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new RefreshRunnable(), autorefresh, autorefresh, TimeUnit.MILLISECONDS);
+ }
+ }
+
+ @Override
+ public void stop() throws Exception {
+ if (executor != null) {
+ executor.shutdownNow();
+ while (!executor.isTerminated()) {
+ Thread.sleep(100);
+ }
+ executor = null;
+ }
+ if (manager != null) {
+ log.info(String.format("Stopping controller '%s' at '%s/%s'", name, metaaddress, metacluster));
+ manager.disconnect();
+ manager = null;
+ }
+ if (targetService != null) {
+ log.debug("Stopping target service");
+ targetService.stop();
+ targetService = null;
+ }
+ if (statusService != null) {
+ log.debug("Stopping status service");
+ statusService.stop();
+ statusService = null;
+ }
+ }
+
+ private class RefreshRunnable implements Runnable {
+ @Override
+ public void run() {
+ log.debug("running status refresh");
+ HelixAdmin admin = manager.getClusterManagmentTool();
+
+ for (String metaResource : admin.getResourcesInCluster(metacluster)) {
+ log.debug(String.format("refreshing meta resource '%s'", metaResource));
+
+ IdealState poke = admin.getResourceIdealState(metacluster, metaResource);
+ admin.setResourceIdealState(metacluster, metaResource, poke);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaProviderService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaProviderService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaProviderService.java
new file mode 100644
index 0000000..a8a9064
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaProviderService.java
@@ -0,0 +1,81 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.provider.ProviderProperties;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Lists;
+
+/**
+ * ContainerProvider bootstrapping and management. Create container provider
+ * participant, configure with container properties from meta resources and
+ * connect to meta cluster.
+ *
+ */
+public class MetaProviderService implements Service {
+
+ static final Logger log = Logger.getLogger(MetaProviderService.class);
+
+ Service service;
+
+ String clazz;
+ String metaAddress;
+ String metaCluster;
+
+ ProviderProperties config;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ clazz = properties.getProperty("class");
+ metaAddress = properties.getProperty("metaaddress", "localhost:2199");
+ metaCluster = properties.getProperty("metacluster", "metacluster");
+
+ config = new ProviderProperties();
+ config.putAll(properties);
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.info(String.format("starting service '%s' (config=%s)", clazz, config));
+
+ HelixAdmin admin = new ZKHelixAdmin(metaAddress);
+
+ HelixConfigScope managedScope = new HelixConfigScopeBuilder(ConfigScopeProperty.CLUSTER, metaCluster).build();
+ Map<String, String> managedProps = admin.getConfig(managedScope, Lists.newArrayList("cluster", "address"));
+ config.putAll(managedProps);
+
+ for (String resource : admin.getResourcesInCluster(metaCluster)) {
+ HelixConfigScope resScope = new HelixConfigScopeBuilder(ConfigScopeProperty.RESOURCE, metaCluster, resource).build();
+ List<String> resKeys = admin.getConfigKeys(resScope);
+ Map<String, String> resProps = admin.getConfig(resScope, resKeys);
+
+ Properties properties = new Properties();
+ properties.putAll(resProps);
+
+ config.addContainer(resource, properties);
+ }
+
+ service = BootUtils.createInstance(clazz);
+ service.configure(config);
+ service.start();
+ }
+
+ @Override
+ public void stop() throws Exception {
+ log.info(String.format("stopping service '%s' (config=%s)", clazz, config));
+ if (service != null) {
+ service.stop();
+ service = null;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaResourceService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaResourceService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaResourceService.java
new file mode 100644
index 0000000..55478ad
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/MetaResourceService.java
@@ -0,0 +1,87 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.provider.ProviderRebalancer;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Maps;
+
+/**
+ * Bootstrapping meta resource. Create container type configuration in Helix
+ * zookeeper namespace.
+ *
+ */
+public class MetaResourceService implements Service {
+
+ static final Logger log = Logger.getLogger(MetaResourceService.class);
+
+ String metaCluster;
+ String metaAddress;
+ String name;
+ Map<String, String> config;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ metaCluster = properties.getProperty("metacluster", "metacluster");
+ metaAddress = properties.getProperty("metaaddress", "localhost:2199");
+ name = properties.getProperty("name", "container");
+
+ this.config = new HashMap<String, String>();
+ for (Map.Entry<Object, Object> entry : properties.entrySet()) {
+ this.config.put((String) entry.getKey(), (String) entry.getValue());
+ }
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.info(String.format("setting up meta resource '%s' at '%s/%s'", name, metaAddress, metaCluster));
+ HelixAdmin admin = new ZKHelixAdmin(metaAddress);
+
+ log.info(String.format("setting up container '%s' (config='%s')", name, config));
+
+ admin.addResource(metaCluster, name, 1, "OnlineOffline", RebalanceMode.USER_DEFINED.toString());
+ IdealState idealState = admin.getResourceIdealState(metaCluster, name);
+ idealState.setRebalancerClassName(ProviderRebalancer.class.getName());
+ idealState.setReplicas("1");
+
+ // BEGIN workaround
+ // FIXME workaround for HELIX-226
+ Map<String, List<String>> listFields = Maps.newHashMap();
+ Map<String, Map<String, String>> mapFields = Maps.newHashMap();
+ for (int i = 0; i < 256; i++) {
+ String partitionName = name + "_" + i;
+ listFields.put(partitionName, new ArrayList<String>());
+ mapFields.put(partitionName, new HashMap<String, String>());
+ }
+ idealState.getRecord().setListFields(listFields);
+ idealState.getRecord().setMapFields(mapFields);
+ // END workaround
+
+ admin.setResourceIdealState(metaCluster, name, idealState);
+
+ HelixConfigScope scope = new HelixConfigScopeBuilder(ConfigScopeProperty.RESOURCE, metaCluster, name).build();
+ admin.setConfig(scope, this.config);
+
+ admin.close();
+ log.info("setup complete");
+ }
+
+ @Override
+ public void stop() throws Exception {
+ // left blank
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ResourceService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ResourceService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ResourceService.java
new file mode 100644
index 0000000..0952b36
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ResourceService.java
@@ -0,0 +1,61 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.log4j.Logger;
+
+/**
+ * Bootstrapping Helix resource. Create resource in Helix and configure
+ * properties.
+ *
+ */
+public class ResourceService implements Service {
+
+ static final Logger log = Logger.getLogger(ResourceService.class);
+
+ String cluster;
+ String address;
+ String container;
+ String name;
+ String model;
+ int partitions;
+ int replica;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ cluster = properties.getProperty("cluster", "cluster");
+ address = properties.getProperty("address", "localhost:2199");
+ name = properties.getProperty("name", "resource");
+ container = properties.getProperty("container", "container");
+ model = properties.getProperty("model", "OnlineOffline");
+ partitions = Integer.parseInt(properties.getProperty("partitions", "1"));
+ replica = Integer.parseInt(properties.getProperty("replica", "1"));
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.info(String.format("setting up resource '%s' at '%s/%s'", name, address, cluster));
+ HelixAdmin admin = new ZKHelixAdmin(address);
+
+ log.info(String.format("setting up resource '%s' (container='%s', model='%s', partitions=%d, replica=%d)", name, container, model, partitions, replica));
+
+ admin.addResource(cluster, name, partitions, model, RebalanceMode.FULL_AUTO.toString());
+ IdealState idealState = admin.getResourceIdealState(cluster, name);
+ idealState.setInstanceGroupTag(container);
+ idealState.setReplicas(String.valueOf(replica));
+ admin.setResourceIdealState(cluster, name, idealState);
+ admin.close();
+ log.info("setup complete");
+ }
+
+ @Override
+ public void stop() throws Exception {
+ // left blank
+ }
+
+}
[14/15] Adding Helix-task-framework and Yarn integration modules
Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskRebalancer.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskRebalancer.java b/helix-core/src/main/java/org/apache/helix/task/TaskRebalancer.java
new file mode 100644
index 0000000..5664713
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskRebalancer.java
@@ -0,0 +1,736 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Sets;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import org.apache.helix.AccessOption;
+import org.apache.helix.HelixDataAccessor;
+import org.apache.helix.HelixManager;
+import org.apache.helix.PropertyKey;
+import org.apache.helix.ZNRecord;
+import org.apache.helix.controller.rebalancer.Rebalancer;
+import org.apache.helix.controller.stages.ClusterDataCache;
+import org.apache.helix.controller.stages.CurrentStateOutput;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.Partition;
+import org.apache.helix.model.Resource;
+import org.apache.helix.model.ResourceAssignment;
+import org.apache.log4j.Logger;
+
+
+/**
+ * Custom rebalancer implementation for the {@code Task} state model.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskRebalancer implements Rebalancer
+{
+ private static final Logger LOG = Logger.getLogger(TaskRebalancer.class);
+ private HelixManager _manager;
+
+ @Override
+ public void init(HelixManager manager)
+ {
+ _manager = manager;
+ }
+
+ @Override
+ public ResourceAssignment computeResourceMapping(Resource resource,
+ IdealState taskIs,
+ CurrentStateOutput currStateOutput,
+ ClusterDataCache clusterData)
+ {
+ final String resourceName = resource.getResourceName();
+
+ // Fetch task configuration
+ TaskConfig taskCfg = TaskUtil.getTaskCfg(_manager, resourceName);
+ String workflowResource = taskCfg.getWorkflow();
+
+ // Fetch workflow configuration and context
+ WorkflowConfig workflowCfg = TaskUtil.getWorkflowCfg(_manager, workflowResource);
+ WorkflowContext workflowCtx = TaskUtil.getWorkflowContext(_manager, workflowResource);
+
+ // Initialize workflow context if needed
+ if (workflowCtx == null)
+ {
+ workflowCtx = new WorkflowContext(new ZNRecord("WorkflowContext"));
+ workflowCtx.setStartTime(System.currentTimeMillis());
+ }
+
+ // Check parent dependencies
+ for (String parent : workflowCfg.getTaskDag().getDirectParents(resourceName))
+ {
+ if (workflowCtx.getTaskState(parent) == null || !workflowCtx.getTaskState(parent).equals(TaskState.COMPLETED))
+ {
+ return emptyAssignment(resourceName);
+ }
+ }
+
+ // Clean up if workflow marked for deletion
+ TargetState targetState = workflowCfg.getTargetState();
+ if (targetState == TargetState.DELETE)
+ {
+ cleanup(_manager, resourceName, workflowCfg, workflowResource);
+ return emptyAssignment(resourceName);
+ }
+
+ // Check if this workflow has been finished past its expiry.
+ if (workflowCtx.getFinishTime() != WorkflowContext.UNFINISHED
+ && workflowCtx.getFinishTime() + workflowCfg.getExpiry() <= System.currentTimeMillis())
+ {
+ markForDeletion(_manager, workflowResource);
+ cleanup(_manager, resourceName, workflowCfg, workflowResource);
+ return emptyAssignment(resourceName);
+ }
+
+ // Fetch any existing context information from the property store.
+ TaskContext taskCtx = TaskUtil.getTaskContext(_manager, resourceName);
+ if (taskCtx == null)
+ {
+ taskCtx = new TaskContext(new ZNRecord("TaskContext"));
+ taskCtx.setStartTime(System.currentTimeMillis());
+ }
+
+ // The task is already in a final state (completed/failed).
+ if (workflowCtx.getTaskState(resourceName) == TaskState.FAILED
+ || workflowCtx.getTaskState(resourceName) == TaskState.COMPLETED)
+ {
+ return emptyAssignment(resourceName);
+ }
+
+ ResourceAssignment prevAssignment = TaskUtil.getPrevResourceAssignment(_manager, resourceName);
+ if (prevAssignment == null)
+ {
+ prevAssignment = new ResourceAssignment(resourceName);
+ }
+
+ // Will contain the list of partitions that must be explicitly dropped from the ideal state that is stored in zk.
+ // Fetch the previous resource assignment from the property store. This is required because of HELIX-230.
+ Set<Integer> partitionsToDrop = new TreeSet<Integer>();
+
+ ResourceAssignment newAssignment = computeResourceMapping(resourceName,
+ workflowCfg,
+ taskCfg,
+ prevAssignment,
+ clusterData.getIdealState(taskCfg.getTargetResource()),
+ clusterData.getLiveInstances().keySet(),
+ currStateOutput,
+ workflowCtx,
+ taskCtx,
+ partitionsToDrop);
+
+ if (!partitionsToDrop.isEmpty())
+ {
+ for (Integer pId : partitionsToDrop)
+ {
+ taskIs.getRecord().getMapFields().remove(pName(resourceName, pId));
+ }
+ HelixDataAccessor accessor = _manager.getHelixDataAccessor();
+ PropertyKey propertyKey = accessor.keyBuilder().idealStates(resourceName);
+ accessor.setProperty(propertyKey, taskIs);
+ }
+
+ // Update rebalancer context, previous ideal state.
+ TaskUtil.setTaskContext(_manager, resourceName, taskCtx);
+ TaskUtil.setWorkflowContext(_manager, workflowResource, workflowCtx);
+ TaskUtil.setPrevResourceAssignment(_manager, resourceName, newAssignment);
+
+ return newAssignment;
+ }
+
+ private static ResourceAssignment computeResourceMapping(String taskResource,
+ WorkflowConfig workflowConfig,
+ TaskConfig taskCfg,
+ ResourceAssignment prevAssignment,
+ IdealState tgtResourceIs,
+ Iterable<String> liveInstances,
+ CurrentStateOutput currStateOutput,
+ WorkflowContext workflowCtx,
+ TaskContext taskCtx,
+ Set<Integer> partitionsToDropFromIs)
+ {
+ TargetState taskTgtState = workflowConfig.getTargetState();
+
+ // Update running status in workflow context
+ if (taskTgtState == TargetState.STOP)
+ {
+ workflowCtx.setTaskState(taskResource, TaskState.STOPPED);
+ // Workflow has been stopped if all tasks are stopped
+ if (isWorkflowStopped(workflowCtx, workflowConfig))
+ {
+ workflowCtx.setWorkflowState(TaskState.STOPPED);
+ }
+ }
+ else
+ {
+ workflowCtx.setTaskState(taskResource, TaskState.IN_PROGRESS);
+ // Workflow is in progress if any task is in progress
+ workflowCtx.setWorkflowState(TaskState.IN_PROGRESS);
+ }
+
+ // Used to keep track of task partitions that have already been assigned to instances.
+ Set<Integer> assignedPartitions = new HashSet<Integer>();
+
+ // Keeps a mapping of (partition) -> (instance, state)
+ Map<Integer, PartitionAssignment> paMap = new TreeMap<Integer, PartitionAssignment>();
+
+ // Process all the current assignments of task partitions.
+ Set<Integer> allPartitions = getAllTaskPartitions(tgtResourceIs, taskCfg);
+ Map<String, SortedSet<Integer>> taskAssignments = getTaskPartitionAssignments(liveInstances,
+ prevAssignment,
+ allPartitions);
+ for (String instance : taskAssignments.keySet())
+ {
+ Set<Integer> pSet = taskAssignments.get(instance);
+ // Used to keep track of partitions that are in one of the final states: COMPLETED, TIMED_OUT, TASK_ERROR, ERROR.
+ Set<Integer> donePartitions = new TreeSet<Integer>();
+ for (int pId : pSet)
+ {
+ final String pName = pName(taskResource, pId);
+
+ // Check for pending state transitions on this (partition, instance).
+ String pendingState = currStateOutput.getPendingState(taskResource,
+ new Partition(pName),
+ instance);
+ if (pendingState != null)
+ {
+ // There is a pending state transition for this (partition, instance). Just copy forward the state
+ // assignment from the previous ideal state.
+ Map<String, String> stateMap = prevAssignment.getReplicaMap(new Partition(pName));
+ if (stateMap != null)
+ {
+ String prevState = stateMap.get(instance);
+ paMap.put(pId, new PartitionAssignment(instance, prevState));
+ assignedPartitions.add(pId);
+ LOG.debug(String.format("Task partition %s has a pending state transition on instance %s. Using the previous ideal state which was %s.",
+ pName,
+ instance,
+ prevState));
+ }
+
+ continue;
+ }
+
+ TaskPartitionState currState = TaskPartitionState.valueOf(currStateOutput.getCurrentState(taskResource,
+ new Partition(pName),
+ instance));
+
+ // Process any requested state transitions.
+ String requestedStateStr = currStateOutput.getRequestedState(taskResource,
+ new Partition(pName),
+ instance);
+ if (requestedStateStr != null && !requestedStateStr.isEmpty())
+ {
+ TaskPartitionState requestedState = TaskPartitionState.valueOf(requestedStateStr);
+ if (requestedState.equals(currState))
+ {
+ LOG.warn(String.format("Requested state %s is the same as the current state for instance %s.",
+ requestedState,
+ instance));
+ }
+
+ paMap.put(pId, new PartitionAssignment(instance, requestedState.name()));
+ assignedPartitions.add(pId);
+ LOG.debug(String.format("Instance %s requested a state transition to %s for partition %s.",
+ instance,
+ requestedState,
+ pName));
+ continue;
+ }
+
+ switch (currState)
+ {
+ case RUNNING:
+ case STOPPED:
+ {
+ TaskPartitionState nextState;
+ if (taskTgtState == TargetState.START)
+ {
+ nextState = TaskPartitionState.RUNNING;
+ }
+ else
+ {
+ nextState = TaskPartitionState.STOPPED;
+ }
+
+ paMap.put(pId, new PartitionAssignment(instance, nextState.name()));
+ assignedPartitions.add(pId);
+ LOG.debug(String.format("Setting task partition %s state to %s on instance %s.",
+ pName,
+ nextState,
+ instance));
+ }
+ break;
+ case COMPLETED:
+ {
+ // The task has completed on this partition. Mark as such in the context object.
+ donePartitions.add(pId);
+ LOG.debug(String.format("Task partition %s has completed with state %s. Marking as such in rebalancer context.",
+ pName,
+ currState));
+ partitionsToDropFromIs.add(pId);
+ markPartitionCompleted(taskCtx, pId);
+ }
+ break;
+ case TIMED_OUT:
+ case TASK_ERROR:
+ case ERROR:
+ {
+ donePartitions.add(pId); // The task may be rescheduled on a different instance.
+ LOG.debug(String.format("Task partition %s has error state %s. Marking as such in rebalancer context.",
+ pName,
+ currState));
+ markPartitionError(taskCtx, pId, currState);
+ // The error policy is to fail the task as soon a single partition fails for a specified maximum number of
+ // attempts.
+ if (taskCtx.getPartitionNumAttempts(pId) >= taskCfg.getMaxAttemptsPerPartition())
+ {
+ workflowCtx.setTaskState(taskResource, TaskState.FAILED);
+ workflowCtx.setWorkflowState(TaskState.FAILED);
+ addAllPartitions(tgtResourceIs.getPartitionSet(), partitionsToDropFromIs);
+ return emptyAssignment(taskResource);
+ }
+ }
+ break;
+ case INIT:
+ case DROPPED:
+ {
+ // currState in [INIT, DROPPED]. Do nothing, the partition is eligible to be reassigned.
+ donePartitions.add(pId);
+ LOG.debug(String.format("Task partition %s has state %s. It will be dropped from the current ideal state.",
+ pName,
+ currState));
+ }
+ break;
+ default:
+ throw new AssertionError("Unknown enum symbol: " + currState);
+ }
+ }
+
+ // Remove the set of task partitions that are completed or in one of the error states.
+ pSet.removeAll(donePartitions);
+ }
+
+ if (isTaskComplete(taskCtx, allPartitions))
+ {
+ workflowCtx.setTaskState(taskResource, TaskState.COMPLETED);
+ if (isWorkflowComplete(workflowCtx, workflowConfig))
+ {
+ workflowCtx.setWorkflowState(TaskState.COMPLETED);
+ workflowCtx.setFinishTime(System.currentTimeMillis());
+ }
+ }
+
+ // Make additional task assignments if needed.
+ if (taskTgtState == TargetState.START)
+ {
+ // Contains the set of task partitions that must be excluded from consideration when making any new assignments.
+ // This includes all completed, failed, already assigned partitions.
+ Set<Integer> excludeSet = Sets.newTreeSet(assignedPartitions);
+ addCompletedPartitions(excludeSet, taskCtx, allPartitions);
+ // Get instance->[partition, ...] mappings for the target resource.
+ Map<String, SortedSet<Integer>> tgtPartitionAssignments = getTgtPartitionAssignment(currStateOutput,
+ liveInstances,
+ tgtResourceIs,
+ taskCfg.getTargetPartitionStates(),
+ allPartitions);
+ for (Map.Entry<String, SortedSet<Integer>> entry : taskAssignments.entrySet())
+ {
+ String instance = entry.getKey();
+ // Contains the set of task partitions currently assigned to the instance.
+ Set<Integer> pSet = entry.getValue();
+ int numToAssign = taskCfg.getNumConcurrentTasksPerInstance() - pSet.size();
+ if (numToAssign > 0)
+ {
+ List<Integer> nextPartitions = getNextPartitions(tgtPartitionAssignments.get(instance),
+ excludeSet,
+ numToAssign);
+ for (Integer pId : nextPartitions)
+ {
+ String pName = pName(taskResource, pId);
+ paMap.put(pId, new PartitionAssignment(instance, TaskPartitionState.RUNNING.name()));
+ excludeSet.add(pId);
+ LOG.debug(String.format("Setting task partition %s state to %s on instance %s.",
+ pName,
+ TaskPartitionState.RUNNING,
+ instance));
+ }
+ }
+ }
+ }
+
+ // Construct a ResourceAssignment object from the map of partition assignments.
+ ResourceAssignment ra = new ResourceAssignment(taskResource);
+ for (Map.Entry<Integer, PartitionAssignment> e : paMap.entrySet())
+ {
+ PartitionAssignment pa = e.getValue();
+ ra.addReplicaMap(new Partition(pName(taskResource, e.getKey())), ImmutableMap.of(pa._instance, pa._state));
+ }
+
+ return ra;
+ }
+
+ /**
+ * Checks if the task has completed.
+ *
+ * @param ctx The rebalancer context.
+ * @param allPartitions The set of partitions to check.
+ *
+ * @return true if all task partitions have been marked with status {@link TaskPartitionState#COMPLETED} in the rebalancer
+ * context, false otherwise.
+ */
+ private static boolean isTaskComplete(TaskContext ctx, Set<Integer> allPartitions)
+ {
+ for (Integer pId : allPartitions)
+ {
+ TaskPartitionState state = ctx.getPartitionState(pId);
+ if (state != TaskPartitionState.COMPLETED)
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Checks if the workflow has completed.
+ *
+ * @param ctx Workflow context containing task states
+ * @param cfg Workflow config containing set of tasks
+ *
+ * @return returns true if all tasks are {@link TaskState#COMPLETED}, false otherwise.
+ */
+ private static boolean isWorkflowComplete(WorkflowContext ctx, WorkflowConfig cfg)
+ {
+ for (String task : cfg.getTaskDag().getAllNodes())
+ {
+ if(ctx.getTaskState(task) != TaskState.COMPLETED)
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Checks if the workflow has been stopped.
+ *
+ * @param ctx Workflow context containing task states
+ * @param cfg Workflow config containing set of tasks
+ *
+ * @return returns true if all tasks are {@link TaskState#STOPPED}, false otherwise.
+ */
+ private static boolean isWorkflowStopped(WorkflowContext ctx, WorkflowConfig cfg)
+ {
+ for (String task : cfg.getTaskDag().getAllNodes())
+ {
+ if(ctx.getTaskState(task) != TaskState.STOPPED && ctx.getTaskState(task) != null)
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private static void markForDeletion(HelixManager mgr, String resourceName)
+ {
+ mgr.getConfigAccessor().set(TaskUtil.getResourceConfigScope(mgr.getClusterName(), resourceName),
+ WorkflowConfig.TARGET_STATE,
+ TargetState.DELETE.name());
+ }
+
+ /**
+ * Cleans up all Helix state associated with this task, wiping workflow-level information if this is the last
+ * remaining task in its workflow.
+ */
+ private static void cleanup(HelixManager mgr, String resourceName, WorkflowConfig cfg, String workflowResource)
+ {
+ HelixDataAccessor accessor = mgr.getHelixDataAccessor();
+ // Delete resource configs.
+ PropertyKey cfgKey = getConfigPropertyKey(accessor, resourceName);
+ if (!accessor.removeProperty(cfgKey))
+ {
+ throw new RuntimeException(String.format(
+ "Error occurred while trying to clean up task %s. Failed to remove node %s from Helix. Aborting further clean up steps.",
+ resourceName,
+ cfgKey));
+ }
+ // Delete property store information for this resource.
+ String propStoreKey = getRebalancerPropStoreKey(resourceName);
+ if (!mgr.getHelixPropertyStore().remove(propStoreKey, AccessOption.PERSISTENT))
+ {
+ throw new RuntimeException(String.format(
+ "Error occurred while trying to clean up task %s. Failed to remove node %s from Helix. Aborting further clean up steps.",
+ resourceName,
+ propStoreKey));
+ }
+ // Finally, delete the ideal state itself.
+ PropertyKey isKey = getISPropertyKey(accessor, resourceName);
+ if (!accessor.removeProperty(isKey))
+ {
+ throw new RuntimeException(String.format(
+ "Error occurred while trying to clean up task %s. Failed to remove node %s from Helix.",
+ resourceName,
+ isKey));
+ }
+ LOG.info(String.format("Successfully cleaned up task resource %s.", resourceName));
+
+ boolean lastInWorkflow = true;
+ for(String task : cfg.getTaskDag().getAllNodes())
+ {
+ // check if property store information or resource configs exist for this task
+ if(mgr.getHelixPropertyStore().exists(getRebalancerPropStoreKey(task), AccessOption.PERSISTENT)
+ || accessor.getProperty(getConfigPropertyKey(accessor, task)) != null
+ || accessor.getProperty(getISPropertyKey(accessor, task)) != null)
+ {
+ lastInWorkflow = false;
+ }
+ }
+
+ // clean up task-level info if this was the last in workflow
+ if(lastInWorkflow)
+ {
+ // delete workflow config
+ PropertyKey workflowCfgKey = getConfigPropertyKey(accessor, workflowResource);
+ if (!accessor.removeProperty(workflowCfgKey))
+ {
+ throw new RuntimeException(String.format(
+ "Error occurred while trying to clean up workflow %s. Failed to remove node %s from Helix. Aborting further clean up steps.",
+ workflowResource,
+ workflowCfgKey));
+ }
+ // Delete property store information for this workflow
+ String workflowPropStoreKey = getRebalancerPropStoreKey(workflowResource);
+ if (!mgr.getHelixPropertyStore().remove(workflowPropStoreKey, AccessOption.PERSISTENT))
+ {
+ throw new RuntimeException(String.format(
+ "Error occurred while trying to clean up workflow %s. Failed to remove node %s from Helix. Aborting further clean up steps.",
+ workflowResource,
+ workflowPropStoreKey));
+ }
+ }
+
+ }
+
+ private static String getRebalancerPropStoreKey(String resource)
+ {
+ return Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT, resource);
+ }
+
+ private static PropertyKey getISPropertyKey(HelixDataAccessor accessor, String resource)
+ {
+ return accessor.keyBuilder().idealStates(resource);
+ }
+
+ private static PropertyKey getConfigPropertyKey(HelixDataAccessor accessor, String resource)
+ {
+ return accessor.keyBuilder().resourceConfig(resource);
+ }
+
+ private static void addAllPartitions(Set<String> pNames, Set<Integer> pIds)
+ {
+ for (String pName : pNames)
+ {
+ pIds.add(pId(pName));
+ }
+ }
+
+ private static ResourceAssignment emptyAssignment(String name)
+ {
+ return new ResourceAssignment(name);
+ }
+
+ private static void addCompletedPartitions(Set<Integer> set, TaskContext ctx, Iterable<Integer> pIds)
+ {
+ for (Integer pId : pIds)
+ {
+ TaskPartitionState state = ctx.getPartitionState(pId);
+ if (state == TaskPartitionState.COMPLETED)
+ {
+ set.add(pId);
+ }
+ }
+ }
+
+ /**
+ * Returns the set of all partition ids for a task.
+ * <p/>
+ * If a set of partition ids was explicitly specified in the config, that is used. Otherwise, we use the list of all
+ * partition ids from the target resource.
+ */
+ private static Set<Integer> getAllTaskPartitions(IdealState tgtResourceIs, TaskConfig taskCfg)
+ {
+ Set<Integer> taskPartitions = new HashSet<Integer>();
+ if (taskCfg.getTargetPartitions() != null)
+ {
+ for (Integer pId : taskCfg.getTargetPartitions())
+ {
+ taskPartitions.add(pId);
+ }
+ }
+ else
+ {
+ for (String pName : tgtResourceIs.getPartitionSet())
+ {
+ taskPartitions.add(pId(pName));
+ }
+ }
+
+ return taskPartitions;
+ }
+
+ private static List<Integer> getNextPartitions(SortedSet<Integer> candidatePartitions, Set<Integer> excluded, int n)
+ {
+ List<Integer> result = new ArrayList<Integer>(n);
+ for (Integer pId : candidatePartitions)
+ {
+ if (result.size() >= n)
+ {
+ break;
+ }
+
+ if (!excluded.contains(pId))
+ {
+ result.add(pId);
+ }
+ }
+
+ return result;
+ }
+
+ private static void markPartitionCompleted(TaskContext ctx, int pId)
+ {
+ ctx.setPartitionState(pId, TaskPartitionState.COMPLETED);
+ ctx.setPartitionFinishTime(pId, System.currentTimeMillis());
+ ctx.incrementNumAttempts(pId);
+ }
+
+ private static void markPartitionError(TaskContext ctx, int pId, TaskPartitionState state)
+ {
+ ctx.setPartitionState(pId, state);
+ ctx.setPartitionFinishTime(pId, System.currentTimeMillis());
+ ctx.incrementNumAttempts(pId);
+ }
+
+ /**
+ * Get partition assignments for the target resource, but only for the partitions of interest.
+ *
+ * @param currStateOutput The current state of the instances in the cluster.
+ * @param instanceList The set of instances.
+ * @param tgtIs The ideal state of the target resource.
+ * @param tgtStates Only partitions in this set of states will be considered. If null, partitions do not need to
+ * be in any specific state to be considered.
+ * @param includeSet The set of partitions to consider.
+ *
+ * @return A map of instance vs set of partition ids assigned to that instance.
+ */
+ private static Map<String, SortedSet<Integer>> getTgtPartitionAssignment(CurrentStateOutput currStateOutput,
+ Iterable<String> instanceList,
+ IdealState tgtIs,
+ Set<String> tgtStates,
+ Set<Integer> includeSet)
+ {
+ Map<String, SortedSet<Integer>> result = new HashMap<String, SortedSet<Integer>>();
+ for (String instance : instanceList)
+ {
+ result.put(instance, new TreeSet<Integer>());
+ }
+
+ for (String pName : tgtIs.getPartitionSet())
+ {
+ int pId = pId(pName);
+ if (includeSet.contains(pId))
+ {
+ for (String instance : instanceList)
+ {
+ String state = currStateOutput.getCurrentState(tgtIs.getResourceName(), new Partition(pName), instance);
+ if (tgtStates == null || tgtStates.contains(state))
+ {
+ result.get(instance).add(pId);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ /**
+ * Return the assignment of task partitions per instance.
+ */
+ private static Map<String, SortedSet<Integer>> getTaskPartitionAssignments(Iterable<String> instanceList,
+ ResourceAssignment assignment,
+ Set<Integer> includeSet)
+ {
+ Map<String, SortedSet<Integer>> result = new HashMap<String, SortedSet<Integer>>();
+ for (String instance : instanceList)
+ {
+ result.put(instance, new TreeSet<Integer>());
+ }
+
+ for (Partition partition : assignment.getMappedPartitions())
+ {
+ int pId = pId(partition.getPartitionName());
+ if (includeSet.contains(pId))
+ {
+ Map<String, String> replicaMap = assignment.getReplicaMap(partition);
+ for (String instance : replicaMap.keySet())
+ {
+ SortedSet<Integer> pList = result.get(instance);
+ if (pList != null)
+ {
+ pList.add(pId);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ /**
+ * Computes the partition name given the resource name and partition id.
+ */
+ private static String pName(String resource, int pId)
+ {
+ return resource + "_" + pId;
+ }
+
+ /**
+ * Extracts the partition id from the given partition name.
+ */
+ private static int pId(String pName)
+ {
+ String[] tokens = pName.split("_");
+ return Integer.valueOf(tokens[tokens.length - 1]);
+ }
+
+ /**
+ * An (instance, state) pair.
+ */
+ private static class PartitionAssignment
+ {
+ private final String _instance;
+ private final String _state;
+
+ private PartitionAssignment(String instance, String state)
+ {
+ _instance = instance;
+ _state = state;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskResult.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskResult.java b/helix-core/src/main/java/org/apache/helix/task/TaskResult.java
new file mode 100644
index 0000000..d54e170
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskResult.java
@@ -0,0 +1,63 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+/**
+ * The result of a task execution.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskResult
+{
+ /**
+ * An enumeration of status codes.
+ */
+ public enum Status
+ {
+ /** The task completed normally. */
+ COMPLETED,
+ /** The task was cancelled externally, i.e. {@link org.apache.helix.task.Task#cancel()} was called. */
+ CANCELED,
+ /** The task encountered an error from which it could not recover. */
+ ERROR
+ }
+
+ private final Status _status;
+ private final String _info;
+
+ /**
+ * Constructs a new {@link TaskResult}.
+ *
+ * @param status The status code.
+ * @param info Information that can be interpreted by the {@link Task} implementation that constructed this object.
+ * May encode progress or check point information that can be used by the task to resume from where it
+ * left off in a previous execution.
+ */
+ public TaskResult(Status status, String info)
+ {
+ _status = status;
+ _info = info;
+ }
+
+ public Status getStatus()
+ {
+ return _status;
+ }
+
+ public String getInfo()
+ {
+ return _info;
+ }
+
+ @Override
+ public String toString()
+ {
+ return "TaskResult{" +
+ "_status=" + _status +
+ ", _info='" + _info + '\'' +
+ '}';
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskRunner.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskRunner.java b/helix-core/src/main/java/org/apache/helix/task/TaskRunner.java
new file mode 100644
index 0000000..f071b1c
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskRunner.java
@@ -0,0 +1,190 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+import org.apache.helix.HelixManager;
+import org.apache.log4j.Logger;
+
+
+/**
+ * A wrapping {@link Runnable} used to manage the life-cycle of a user-defined {@link Task} implementation.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskRunner implements Runnable
+{
+ private static final Logger LOG = Logger.getLogger(TaskRunner.class);
+ private final HelixManager _manager;
+ private final String _taskName;
+ private final String _taskPartition;
+ private final String _sessionId;
+ private final String _instance;
+ // Synchronization object used to signal that the task has been scheduled on a thread.
+ private final Object _startedSync = new Object();
+ // Synchronization object used to signal that the task has finished.
+ private final Object _doneSync = new Object();
+ private final Task _task;
+ // Stores the result of the task once it has finished.
+ private volatile TaskResult _result = null;
+ // If true, indicates that the task has started.
+ private volatile boolean _started = false;
+ // If true, indicates that the task was canceled due to a task timeout.
+ private volatile boolean _timeout = false;
+ // If true, indicates that the task has finished.
+ private volatile boolean _done = false;
+
+ public TaskRunner(Task task,
+ String taskName,
+ String taskPartition,
+ String instance,
+ HelixManager manager,
+ String sessionId)
+ {
+ _task = task;
+ _taskName = taskName;
+ _taskPartition = taskPartition;
+ _instance = instance;
+ _manager = manager;
+ _sessionId = sessionId;
+ }
+
+ @Override
+ public void run()
+ {
+ try
+ {
+ signalStarted();
+ _result = _task.run();
+
+ switch (_result.getStatus())
+ {
+ case COMPLETED:
+ requestStateTransition(TaskPartitionState.COMPLETED);
+ break;
+ case CANCELED:
+ if (_timeout)
+ {
+ requestStateTransition(TaskPartitionState.TIMED_OUT);
+ }
+ // Else the state transition to CANCELED was initiated by the controller.
+ break;
+ case ERROR:
+ requestStateTransition(TaskPartitionState.TASK_ERROR);
+ break;
+ default:
+ throw new AssertionError("Unknown result type.");
+ }
+ }
+ catch (Exception e)
+ {
+ requestStateTransition(TaskPartitionState.TASK_ERROR);
+ }
+ finally
+ {
+ synchronized (_doneSync)
+ {
+ _done = true;
+ _doneSync.notifyAll();
+ }
+ }
+ }
+
+ /**
+ * Signals the task to cancel itself.
+ */
+ public void timeout()
+ {
+ _timeout = true;
+ cancel();
+ }
+
+ /**
+ * Signals the task to cancel itself.
+ */
+ public void cancel()
+ {
+ _task.cancel();
+ }
+
+ /**
+ * Waits uninterruptibly until the task has started.
+ */
+ public void waitTillStarted()
+ {
+ synchronized (_startedSync)
+ {
+ while (!_started)
+ {
+ try
+ {
+ _startedSync.wait();
+ }
+ catch (InterruptedException e)
+ {
+ LOG.warn(String.format("Interrupted while waiting for task %s to start.", _taskPartition), e);
+ }
+ }
+ }
+ }
+
+ /**
+ * Waits uninterruptibly until the task has finished, either normally or due to an error/cancellation..
+ */
+ public TaskResult waitTillDone()
+ {
+ synchronized (_doneSync)
+ {
+ while (!_done)
+ {
+ try
+ {
+ _doneSync.wait();
+ }
+ catch (InterruptedException e)
+ {
+ LOG.warn(String.format("Interrupted while waiting for task %s to complete.", _taskPartition), e);
+ }
+ }
+ }
+ return _result;
+ }
+
+ /**
+ * Signals any threads waiting for this task to start.
+ */
+ private void signalStarted()
+ {
+ synchronized (_startedSync)
+ {
+ _started = true;
+ _startedSync.notifyAll();
+ }
+ }
+
+ /**
+ * Requests the controller for a state transition.
+ *
+ * @param state The state transition that is being requested.
+ */
+ private void requestStateTransition(TaskPartitionState state)
+ {
+ boolean success = TaskUtil.setRequestedState(_manager.getHelixDataAccessor(),
+ _instance,
+ _sessionId,
+ _taskName,
+ _taskPartition,
+ state);
+ if (!success)
+ {
+ LOG.error(String.format(
+ "Failed to set the requested state to %s for instance %s, session id %s, task partition %s.",
+ state,
+ _instance,
+ _sessionId,
+ _taskPartition));
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskState.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskState.java b/helix-core/src/main/java/org/apache/helix/task/TaskState.java
new file mode 100644
index 0000000..cf78109
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskState.java
@@ -0,0 +1,31 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+/**
+ * Enumeration of current task states. This value is stored in the rebalancer context.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public enum TaskState
+{
+ /**
+ * The task is in progress.
+ */
+ IN_PROGRESS,
+ /**
+ * The task has been stopped. It may be resumed later.
+ */
+ STOPPED,
+ /**
+ * The task has failed. It cannot be resumed.
+ */
+ FAILED,
+ /**
+ * All the task partitions have completed normally.
+ */
+ COMPLETED
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskStateModel.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskStateModel.java b/helix-core/src/main/java/org/apache/helix/task/TaskStateModel.java
new file mode 100644
index 0000000..fa35c63
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskStateModel.java
@@ -0,0 +1,266 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+import java.util.Map;
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ThreadFactory;
+import org.apache.helix.HelixManager;
+import org.apache.helix.NotificationContext;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+
+/**
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+@StateModelInfo(states = "{'NOT USED BY HELIX'}", initialState = "INIT")
+public class TaskStateModel extends StateModel
+{
+ private static final Logger LOG = Logger.getLogger(TaskStateModel.class);
+ private final HelixManager _manager;
+ private final ExecutorService _taskExecutor;
+ private final Map<String, TaskFactory> _taskFactoryRegistry;
+ private final Timer _timer = new Timer("TaskStateModel time out daemon", true);
+ private TaskRunner _taskRunner;
+
+ public TaskStateModel(HelixManager manager, Map<String, TaskFactory> taskFactoryRegistry)
+ {
+ _manager = manager;
+ _taskFactoryRegistry = taskFactoryRegistry;
+ _taskExecutor = Executors.newFixedThreadPool(40, new ThreadFactory()
+ {
+ @Override
+ public Thread newThread(Runnable r)
+ {
+ return new Thread(r, "TaskStateModel-thread-pool");
+ }
+ });
+ }
+
+ @Transition(to = "RUNNING", from = "INIT")
+ public void onBecomeRunningFromInit(Message msg, NotificationContext context)
+ {
+ startTask(msg, msg.getPartitionName());
+ }
+
+ @Transition(to = "STOPPED", from = "RUNNING")
+ public String onBecomeStoppedFromRunning(Message msg, NotificationContext context)
+ {
+ String taskPartition = msg.getPartitionName();
+ if (_taskRunner == null)
+ {
+ throw new IllegalStateException(String.format("Invalid state transition. There is no running task for partition %s.",
+ taskPartition));
+ }
+
+ _taskRunner.cancel();
+ TaskResult r = _taskRunner.waitTillDone();
+ LOG.info(String.format("Task %s completed with result %s.", msg.getPartitionName(), r));
+
+ return r.getInfo();
+ }
+
+ @Transition(to = "COMPLETED", from = "RUNNING")
+ public void onBecomeCompletedFromRunning(Message msg, NotificationContext context)
+ {
+ String taskPartition = msg.getPartitionName();
+ if (_taskRunner == null)
+ {
+ throw new IllegalStateException(String.format("Invalid state transition. There is no running task for partition %s.",
+ taskPartition));
+ }
+
+ TaskResult r = _taskRunner.waitTillDone();
+ if (r.getStatus() != TaskResult.Status.COMPLETED)
+ {
+ throw new IllegalStateException(String.format("Partition %s received a state transition to %s but the result status code is %s.",
+ msg.getPartitionName(),
+ msg.getToState(),
+ r.getStatus()));
+ }
+ }
+
+ @Transition(to = "TIMED_OUT", from = "RUNNING")
+ public String onBecomeTimedOutFromRunning(Message msg, NotificationContext context)
+ {
+ String taskPartition = msg.getPartitionName();
+ if (_taskRunner == null)
+ {
+ throw new IllegalStateException(String.format("Invalid state transition. There is no running task for partition %s.",
+ taskPartition));
+ }
+
+ TaskResult r = _taskRunner.waitTillDone();
+ if (r.getStatus() != TaskResult.Status.CANCELED)
+ {
+ throw new IllegalStateException(String.format("Partition %s received a state transition to %s but the result status code is %s.",
+ msg.getPartitionName(),
+ msg.getToState(),
+ r.getStatus()));
+ }
+
+ return r.getInfo();
+ }
+
+ @Transition(to = "TASK_ERROR", from = "RUNNING")
+ public String onBecomeTaskErrorFromRunning(Message msg, NotificationContext context)
+ {
+ String taskPartition = msg.getPartitionName();
+ if (_taskRunner == null)
+ {
+ throw new IllegalStateException(String.format("Invalid state transition. There is no running task for partition %s.",
+ taskPartition));
+ }
+
+ TaskResult r = _taskRunner.waitTillDone();
+ if (r.getStatus() != TaskResult.Status.ERROR)
+ {
+ throw new IllegalStateException(String.format("Partition %s received a state transition to %s but the result status code is %s.",
+ msg.getPartitionName(),
+ msg.getToState(),
+ r.getStatus()));
+ }
+
+ return r.getInfo();
+ }
+
+ @Transition(to = "RUNNING", from = "STOPPED")
+ public void onBecomeRunningFromStopped(Message msg, NotificationContext context)
+ {
+ startTask(msg, msg.getPartitionName());
+ }
+
+ @Transition(to = "DROPPED", from = "INIT")
+ public void onBecomeDroppedFromInit(Message msg, NotificationContext context)
+ {
+ _taskRunner = null;
+ }
+
+ @Transition(to = "DROPPED", from = "RUNNING")
+ public void onBecomeDroppedFromRunning(Message msg, NotificationContext context)
+ {
+ String taskPartition = msg.getPartitionName();
+ if (_taskRunner == null)
+ {
+ throw new IllegalStateException(String.format("Invalid state transition. There is no running task for partition %s.",
+ taskPartition));
+ }
+
+ _taskRunner.cancel();
+ TaskResult r = _taskRunner.waitTillDone();
+ LOG.info(String.format("Task partition %s returned result %s.", msg.getPartitionName(), r));
+ _taskRunner = null;
+ }
+
+ @Transition(to = "DROPPED", from = "COMPLETED")
+ public void onBecomeDroppedFromCompleted(Message msg, NotificationContext context)
+ {
+ _taskRunner = null;
+ }
+
+ @Transition(to = "DROPPED", from = "STOPPED")
+ public void onBecomeDroppedFromStopped(Message msg, NotificationContext context)
+ {
+ _taskRunner = null;
+ }
+
+ @Transition(to = "DROPPED", from = "TIMED_OUT")
+ public void onBecomeDroppedFromTimedOut(Message msg, NotificationContext context)
+ {
+ _taskRunner = null;
+ }
+
+ @Transition(to = "DROPPED", from = "TASK_ERROR")
+ public void onBecomeDroppedFromTaskError(Message msg, NotificationContext context)
+ {
+ _taskRunner = null;
+ }
+
+ @Transition(to = "INIT", from = "RUNNING")
+ public void onBecomeInitFromRunning(Message msg, NotificationContext context)
+ {
+ String taskPartition = msg.getPartitionName();
+ if (_taskRunner == null)
+ {
+ throw new IllegalStateException(String.format("Invalid state transition. There is no running task for partition %s.",
+ taskPartition));
+ }
+
+ _taskRunner.cancel();
+ TaskResult r = _taskRunner.waitTillDone();
+ LOG.info(String.format("Task partition %s returned result %s.", msg.getPartitionName(), r));
+ _taskRunner = null;
+ }
+
+ @Transition(to = "INIT", from = "COMPLETED")
+ public void onBecomeInitFromCompleted(Message msg, NotificationContext context)
+ {
+ _taskRunner = null;
+ }
+
+ @Transition(to = "INIT", from = "STOPPED")
+ public void onBecomeInitFromStopped(Message msg, NotificationContext context)
+ {
+ _taskRunner = null;
+ }
+
+ @Transition(to = "INIT", from = "TIMED_OUT")
+ public void onBecomeInitFromTimedOut(Message msg, NotificationContext context)
+ {
+ _taskRunner = null;
+ }
+
+ @Transition(to = "INIT", from = "TASK_ERROR")
+ public void onBecomeInitFromTaskError(Message msg, NotificationContext context)
+ {
+ _taskRunner = null;
+ }
+
+ @Override
+ public void reset()
+ {
+ if (_taskRunner != null)
+ {
+ _taskRunner.cancel();
+ }
+ }
+
+ private void startTask(Message msg, String taskPartition)
+ {
+ TaskConfig cfg = TaskUtil.getTaskCfg(_manager, msg.getResourceName());
+ TaskFactory taskFactory = _taskFactoryRegistry.get(cfg.getCommand());
+ Task task = taskFactory.createNewTask(cfg.getCommandConfig());
+
+ _taskRunner = new TaskRunner(task,
+ msg.getResourceName(),
+ taskPartition,
+ msg.getTgtName(),
+ _manager,
+ msg.getTgtSessionId());
+ _taskExecutor.submit(_taskRunner);
+ _taskRunner.waitTillStarted();
+
+ // Set up a timer to cancel the task when its time out expires.
+ _timer.schedule(new TimerTask()
+ {
+ @Override
+ public void run()
+ {
+ if (_taskRunner != null)
+ {
+ _taskRunner.timeout();
+ }
+ }
+ }, cfg.getTimeoutPerPartition());
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskStateModelFactory.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskStateModelFactory.java b/helix-core/src/main/java/org/apache/helix/task/TaskStateModelFactory.java
new file mode 100644
index 0000000..8aa3868
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskStateModelFactory.java
@@ -0,0 +1,34 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+import java.util.Map;
+import org.apache.helix.HelixManager;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+
+
+/**
+ * Factory class for {@link TaskStateModel}.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskStateModelFactory extends StateModelFactory<TaskStateModel>
+{
+ private final HelixManager _manager;
+ private final Map<String, TaskFactory> _taskFactoryRegistry;
+
+ public TaskStateModelFactory(HelixManager manager, Map<String, TaskFactory> taskFactoryRegistry)
+ {
+ _manager = manager;
+ _taskFactoryRegistry = taskFactoryRegistry;
+ }
+
+ @Override
+ public TaskStateModel createNewStateModel(String partitionName)
+ {
+ return new TaskStateModel(_manager, _taskFactoryRegistry);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskUtil.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskUtil.java b/helix-core/src/main/java/org/apache/helix/task/TaskUtil.java
new file mode 100644
index 0000000..d7b235e
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskUtil.java
@@ -0,0 +1,161 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+import com.google.common.base.Joiner;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.helix.AccessOption;
+import org.apache.helix.ConfigAccessor;
+import org.apache.helix.HelixDataAccessor;
+import org.apache.helix.HelixManager;
+import org.apache.helix.PropertyKey;
+import org.apache.helix.ZNRecord;
+import org.apache.helix.model.CurrentState;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.ResourceAssignment;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.log4j.Logger;
+
+
+/**
+ * Static utility methods.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskUtil
+{
+ private static final Logger LOG = Logger.getLogger(TaskUtil.class);
+ private static final String CONTEXT_NODE = "Context";
+ private static final String PREV_RA_NODE = "PreviousResourceAssignment";
+
+ /**
+ * Parses task resource configurations in Helix into a {@link TaskConfig} object.
+ *
+ * @param manager HelixManager object used to connect to Helix.
+ * @param taskResource The name of the task resource.
+ *
+ * @return A {@link TaskConfig} object if Helix contains valid configurations for the task, null otherwise.
+ */
+ public static TaskConfig getTaskCfg(HelixManager manager, String taskResource)
+ {
+ Map<String, String> taskCfg = getResourceConfigMap(manager, taskResource);
+ TaskConfig.Builder b = TaskConfig.Builder.fromMap(taskCfg);
+
+ return b.build();
+ }
+
+ public static WorkflowConfig getWorkflowCfg(HelixManager manager, String workflowResource)
+ {
+ Map<String, String> workflowCfg = getResourceConfigMap(manager, workflowResource);
+ WorkflowConfig.Builder b = WorkflowConfig.Builder.fromMap(workflowCfg);
+
+ return b.build();
+ }
+ public static boolean setRequestedState(HelixDataAccessor accessor,
+ String instance,
+ String sessionId,
+ String resource,
+ String partition,
+ TaskPartitionState state)
+ {
+ LOG.debug(String.format("Requesting a state transition to %s for partition %s.", state, partition));
+ try
+ {
+ PropertyKey.Builder keyBuilder = accessor.keyBuilder();
+ PropertyKey key = keyBuilder.currentState(instance, sessionId, resource);
+ CurrentState currStateDelta = new CurrentState(resource);
+ currStateDelta.setRequestedState(partition, state.name());
+
+ return accessor.updateProperty(key, currStateDelta);
+ }
+ catch (Exception e)
+ {
+ LOG.error(String.format("Error when requesting a state transition to %s for partition %s.", state, partition), e);
+ return false;
+ }
+ }
+
+ public static HelixConfigScope getResourceConfigScope(String clusterName, String resource)
+ {
+ return new HelixConfigScopeBuilder(HelixConfigScope.ConfigScopeProperty.RESOURCE)
+ .forCluster(clusterName).forResource(resource).build();
+ }
+
+ public static ResourceAssignment getPrevResourceAssignment(HelixManager manager, String resourceName)
+ {
+ ZNRecord r = manager.getHelixPropertyStore().get(Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT,
+ resourceName, PREV_RA_NODE), null, AccessOption.PERSISTENT);
+ return r != null ? new ResourceAssignment(r) : null;
+ }
+
+ public static void setPrevResourceAssignment(HelixManager manager, String resourceName, ResourceAssignment ra)
+ {
+ manager.getHelixPropertyStore().set(Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT,
+ resourceName, PREV_RA_NODE), ra.getRecord(), AccessOption.PERSISTENT);
+ }
+
+ public static TaskContext getTaskContext(HelixManager manager, String taskResource)
+ {
+ ZNRecord r = manager.getHelixPropertyStore().get(Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT,
+ taskResource,
+ CONTEXT_NODE), null, AccessOption.PERSISTENT);
+ return r != null ? new TaskContext(r) : null;
+ }
+
+ public static void setTaskContext(HelixManager manager, String taskResource, TaskContext ctx)
+ {
+ manager.getHelixPropertyStore().set(Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT,
+ taskResource,
+ CONTEXT_NODE), ctx.getRecord(), AccessOption.PERSISTENT);
+ }
+
+ public static WorkflowContext getWorkflowContext(HelixManager manager, String workflowResource)
+ {
+ ZNRecord r = manager.getHelixPropertyStore().get(Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT,
+ workflowResource,
+ CONTEXT_NODE), null, AccessOption.PERSISTENT);
+ return r != null ? new WorkflowContext(r) : null;
+ }
+
+ public static void setWorkflowContext(HelixManager manager, String workflowResource, WorkflowContext ctx)
+ {
+ manager.getHelixPropertyStore().set(Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT,
+ workflowResource,
+ CONTEXT_NODE), ctx.getRecord(), AccessOption.PERSISTENT);
+ }
+
+ public static String getNamespacedTaskName(String singleTaskWorkflow)
+ {
+ return getNamespacedTaskName(singleTaskWorkflow, singleTaskWorkflow);
+ }
+
+ public static String getNamespacedTaskName(String workflowResource, String taskName)
+ {
+ return workflowResource + "_" + taskName;
+ }
+
+ private static Map<String, String> getResourceConfigMap(HelixManager manager, String resource)
+ {
+ HelixConfigScope scope = getResourceConfigScope(manager.getClusterName(), resource);
+ ConfigAccessor configAccessor = manager.getConfigAccessor();
+
+ Map<String, String> taskCfg = new HashMap<String, String>();
+ List<String> cfgKeys = configAccessor.getKeys(scope);
+ if (cfgKeys == null || cfgKeys.isEmpty())
+ {
+ return null;
+ }
+
+ for (String cfgKey : cfgKeys)
+ {
+ taskCfg.put(cfgKey, configAccessor.get(scope, cfgKey));
+ }
+
+ return taskCfg;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/Workflow.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/Workflow.java b/helix-core/src/main/java/org/apache/helix/task/Workflow.java
new file mode 100644
index 0000000..0e73e3f
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/Workflow.java
@@ -0,0 +1,261 @@
+package org.apache.helix.task;
+
+
+import com.google.common.base.Joiner;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
+import org.apache.helix.task.beans.TaskBean;
+import org.apache.helix.task.beans.WorkflowBean;
+import org.yaml.snakeyaml.Yaml;
+import org.yaml.snakeyaml.constructor.Constructor;
+
+
+/**
+ * Houses a task dag and config set to fully describe a task workflow
+ *
+ * @author Chris Beavers <cb...@linkedin.com>
+ */
+public class Workflow
+{
+ /** Default workflow name, useful constant for single-node workflows */
+ public static final String UNSPECIFIED = "UNSPECIFIED";
+
+ /** Workflow name */
+ private String _name;
+
+ /** Holds workflow-level configurations */
+ private WorkflowConfig _workflowConfig;
+
+ /** Contains the per-task configurations for all tasks specified in the provided dag */
+ private Map<String, Map<String, String>> _taskConfigs;
+
+ /** Constructs and validates a workflow against a provided dag and config set */
+ private Workflow(String name, WorkflowConfig workflowConfig, Map<String, Map<String, String>> taskConfigs)
+ {
+ _name = name;
+ _workflowConfig = workflowConfig;
+ _taskConfigs = taskConfigs;
+
+ validate();
+ }
+
+ public String getName()
+ {
+ return _name;
+ }
+
+ public Map<String, Map<String, String>> getTaskConfigs()
+ {
+ return _taskConfigs;
+ }
+
+ public Map<String, String> getResourceConfigMap() throws Exception
+ {
+ Map<String, String> cfgMap = new HashMap<String,String>();
+ cfgMap.put(WorkflowConfig.DAG, _workflowConfig.getTaskDag().toJson());
+ cfgMap.put(WorkflowConfig.EXPIRY, String.valueOf(_workflowConfig.getExpiry()));
+ cfgMap.put(WorkflowConfig.TARGET_STATE, _workflowConfig.getTargetState().name());
+
+ return cfgMap;
+ }
+
+ /**
+ * Parses the YAML description from a file into a {@link Workflow} object.
+ *
+ * @param file An abstract path name to the file containing the workflow description.
+ *
+ * @return A {@link Workflow} object.
+ *
+ * @throws Exception
+ */
+ public static Workflow parse(File file)
+ throws Exception
+ {
+ BufferedReader br = new BufferedReader(new FileReader(file));
+ return parse(br);
+ }
+
+ /**
+ * Parses a YAML description of the workflow into a {@link Workflow} object. The YAML string is of the following
+ * form:
+ * <p/>
+ * <pre>
+ * name: MyFlow
+ * tasks:
+ * - name : TaskA
+ * command : SomeTask
+ * ...
+ * - name : TaskB
+ * parents : [TaskA]
+ * command : SomeOtherTask
+ * ...
+ * - name : TaskC
+ * command : AnotherTask
+ * ...
+ * - name : TaskD
+ * parents : [TaskB, TaskC]
+ * command : AnotherTask
+ * ...
+ * </pre>
+ *
+ * @param yaml A YAML string of the above form
+ *
+ * @return A {@link Workflow} object.
+ */
+ public static Workflow parse(String yaml)
+ throws Exception
+ {
+ return parse(new StringReader(yaml));
+ }
+
+ /** Helper function to parse workflow from a generic {@link Reader} */
+ private static Workflow parse(Reader reader) throws Exception
+ {
+ Yaml yaml = new Yaml(new Constructor(WorkflowBean.class));
+ WorkflowBean wf = (WorkflowBean) yaml.load(reader);
+ Builder builder = new Builder(wf.name);
+
+ for (TaskBean task : wf.tasks)
+ {
+ if (task.name == null)
+ {
+ throw new IllegalArgumentException("A task must have a name.");
+ }
+
+ if (task.parents != null)
+ {
+ for (String parent : task.parents)
+ {
+ builder.addParentChildDependency(parent, task.name);
+ }
+ }
+
+ builder.addConfig(task.name, TaskConfig.WORKFLOW_ID, wf.name);
+ builder.addConfig(task.name, TaskConfig.COMMAND, task.command);
+ if (task.commandConfig != null)
+ {
+ builder.addConfig(task.name, TaskConfig.COMMAND_CONFIG, task.commandConfig.toString());
+ }
+ builder.addConfig(task.name, TaskConfig.TARGET_RESOURCE, task.targetResource);
+ if (task.targetPartitionStates != null)
+ {
+ builder.addConfig(task.name, TaskConfig.TARGET_PARTITION_STATES, Joiner.on(",").join(task.targetPartitionStates));
+ }
+ if (task.targetPartitions != null)
+ {
+ builder.addConfig(task.name, TaskConfig.TARGET_PARTITIONS, Joiner.on(",").join(task.targetPartitions));
+ }
+ builder.addConfig(task.name, TaskConfig.MAX_ATTEMPTS_PER_PARTITION, String.valueOf(task.maxAttemptsPerPartition));
+ builder.addConfig(task.name, TaskConfig.NUM_CONCURRENT_TASKS_PER_INSTANCE, String.valueOf(task.numConcurrentTasksPerInstance));
+ builder.addConfig(task.name, TaskConfig.TIMEOUT_PER_PARTITION, String.valueOf(task.timeoutPerPartition));
+ }
+
+ return builder.build();
+ }
+
+ /**
+ * Verifies that all nodes in provided dag have accompanying config and vice-versa.
+ * Also checks dag for cycles and unreachable nodes, and ensures configs are valid.
+ * */
+ public void validate()
+ {
+ // validate dag and configs
+ if(!_taskConfigs.keySet().containsAll(_workflowConfig.getTaskDag().getAllNodes()))
+ {
+ throw new IllegalArgumentException("Nodes specified in DAG missing from config");
+ }
+ else if(!_workflowConfig.getTaskDag().getAllNodes().containsAll(_taskConfigs.keySet()))
+ {
+ throw new IllegalArgumentException("Given DAG lacks nodes with supplied configs");
+ }
+
+ _workflowConfig.getTaskDag().validate();
+
+ for(String node : _taskConfigs.keySet())
+ {
+ buildConfig(node);
+ }
+ }
+
+ /** Builds a TaskConfig from config map. Useful for validating configs */
+ private TaskConfig buildConfig(String task)
+ {
+ return TaskConfig.Builder.fromMap(_taskConfigs.get(task)).build();
+ }
+
+ /** Build a workflow incrementally from dependencies and single configs, validate at build time */
+ public static class Builder
+ {
+ private String _name;
+ private TaskDag _dag;
+ private Map<String, Map<String, String>> _taskConfigs;
+ private long _expiry;
+
+ public Builder(String name)
+ {
+ _name = name;
+ _dag = new TaskDag();
+ _taskConfigs = new TreeMap<String, Map<String, String>>();
+ _expiry = -1;
+ }
+
+ public Builder addConfig(String node, String key, String val)
+ {
+ node = namespacify(node);
+ _dag.addNode(node);
+
+ if(!_taskConfigs.containsKey(node))
+ {
+ _taskConfigs.put(node, new TreeMap<String, String>());
+ }
+ _taskConfigs.get(node).put(key, val);
+
+ return this;
+ }
+
+ public Builder addParentChildDependency(String parent, String child)
+ {
+ parent = namespacify(parent);
+ child = namespacify(child);
+ _dag.addParentToChild(parent, child);
+
+ return this;
+ }
+
+ public Builder setExpiry(long expiry)
+ {
+ _expiry = expiry;
+ return this;
+ }
+
+ public String namespacify(String task)
+ {
+ return TaskUtil.getNamespacedTaskName(_name, task);
+ }
+
+ public Workflow build()
+ {
+ for(String task : _taskConfigs.keySet())
+ {
+ //addConfig(task, TaskConfig.WORKFLOW_ID, _name);
+ _taskConfigs.get(task).put(TaskConfig.WORKFLOW_ID, _name);
+ }
+
+ WorkflowConfig.Builder builder = new WorkflowConfig.Builder();
+ builder.setTaskDag(_dag);
+ builder.setTargetState(TargetState.START);
+ if(_expiry > 0)
+ {
+ builder.setExpiry(_expiry);
+ }
+
+ return new Workflow(_name, builder.build(), _taskConfigs); // calls validate internally
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/WorkflowConfig.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/WorkflowConfig.java b/helix-core/src/main/java/org/apache/helix/task/WorkflowConfig.java
new file mode 100644
index 0000000..547a291
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/WorkflowConfig.java
@@ -0,0 +1,116 @@
+package org.apache.helix.task;
+
+import java.util.Map;
+
+/**
+ * Provides a typed interface to workflow level configurations. Validates the configurations.
+ *
+ * @author Chris Beavers <cb...@linkedin.com>
+ */
+public class WorkflowConfig
+{
+ /* Config fields */
+ public static final String DAG = "Dag";
+ public static final String TARGET_STATE = "TargetState";
+ public static final String EXPIRY = "Expiry";
+
+ /* Default values */
+ public static final long DEFAULT_EXPIRY = 24 * 60 * 60 * 1000;
+
+ /* Member variables */
+ private TaskDag _taskDag;
+ private TargetState _targetState;
+ private long _expiry;
+
+ private WorkflowConfig(TaskDag taskDag,
+ TargetState targetState,
+ long expiry)
+ {
+ _taskDag = taskDag;
+ _targetState = targetState;
+ _expiry = expiry;
+ }
+
+ public TaskDag getTaskDag()
+ {
+ return _taskDag;
+ }
+
+ public TargetState getTargetState()
+ {
+ return _targetState;
+ }
+
+ public long getExpiry()
+ {
+ return _expiry;
+ }
+
+ public static class Builder
+ {
+ private TaskDag _taskDag = TaskDag.EMPTY_DAG;
+ private TargetState _targetState = TargetState.START;
+ private long _expiry = DEFAULT_EXPIRY;
+
+ public Builder()
+ {
+ // Nothing to do
+ }
+
+ public WorkflowConfig build()
+ {
+ validate();
+
+ return new WorkflowConfig(_taskDag,
+ _targetState,
+ _expiry);
+ }
+
+ public Builder setTaskDag(TaskDag v)
+ {
+ _taskDag = v;
+ return this;
+ }
+
+ public Builder setExpiry(long v)
+ {
+ _expiry = v;
+ return this;
+ }
+
+ public Builder setTargetState(TargetState v)
+ {
+ _targetState = v;
+ return this;
+ }
+
+ public static Builder fromMap(Map<String, String> cfg)
+ {
+ Builder b = new Builder();
+
+ if (cfg.containsKey(EXPIRY))
+ {
+ b.setExpiry(Long.parseLong(cfg.get(EXPIRY)));
+ }
+ if (cfg.containsKey(DAG))
+ {
+ b.setTaskDag(TaskDag.fromJson(cfg.get(DAG)));
+ }
+ if (cfg.containsKey(TARGET_STATE))
+ {
+ b.setTargetState(TargetState.valueOf(cfg.get(TARGET_STATE)));
+ }
+
+ return b;
+ }
+
+ private void validate()
+ {
+ if (_expiry < 0)
+ {
+ throw new IllegalArgumentException(String.format("%s has invalid value %s", EXPIRY, _expiry));
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/WorkflowContext.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/WorkflowContext.java b/helix-core/src/main/java/org/apache/helix/task/WorkflowContext.java
new file mode 100644
index 0000000..6840a5a
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/WorkflowContext.java
@@ -0,0 +1,110 @@
+package org.apache.helix.task;
+
+import org.apache.helix.HelixProperty;
+import org.apache.helix.ZNRecord;
+
+import java.util.Map;
+import java.util.TreeMap;
+
+/**
+ * Typed interface to the workflow context information stored by {@link TaskRebalancer} in the Helix property store
+ *
+ * @author Chris Beavers <cb...@linkedin.com>
+ */
+public class WorkflowContext extends HelixProperty
+{
+ public static final String WORKFLOW_STATE = "STATE";
+ public static final String START_TIME = "START_TIME";
+ public static final String FINISH_TIME = "FINISH_TIME";
+ public static final String TASK_STATES = "TASK_STATES";
+ public static final int UNFINISHED = -1;
+
+ public WorkflowContext(ZNRecord record)
+ {
+ super(record);
+ }
+
+ public void setWorkflowState(TaskState s)
+ {
+ if(_record.getSimpleField(WORKFLOW_STATE) == null)
+ {
+ _record.setSimpleField(WORKFLOW_STATE, s.name());
+ }
+ else if(!_record.getSimpleField(WORKFLOW_STATE).equals(TaskState.FAILED.name())
+ && !_record.getSimpleField(WORKFLOW_STATE).equals(TaskState.COMPLETED.name()))
+ {
+ _record.setSimpleField(WORKFLOW_STATE, s.name());
+ }
+ }
+
+ public TaskState getWorkflowState()
+ {
+ String s = _record.getSimpleField(WORKFLOW_STATE);
+ if(s == null)
+ {
+ return null;
+ }
+
+ return TaskState.valueOf(s);
+ }
+
+ public void setTaskState(String taskResource, TaskState s)
+ {
+ Map<String, String> states = _record.getMapField(TASK_STATES);
+ if(states == null)
+ {
+ states = new TreeMap<String, String>();
+ _record.setMapField(TASK_STATES, states);
+ }
+ states.put(taskResource, s.name());
+ }
+
+ public TaskState getTaskState(String taskResource)
+ {
+ Map<String, String> states = _record.getMapField(TASK_STATES);
+ if(states == null)
+ {
+ return null;
+ }
+
+ String s = states.get(taskResource);
+ if (s == null)
+ {
+ return null;
+ }
+
+ return TaskState.valueOf(s);
+ }
+
+ public void setStartTime(long t)
+ {
+ _record.setSimpleField(START_TIME, String.valueOf(t));
+ }
+
+ public long getStartTime()
+ {
+ String tStr = _record.getSimpleField(START_TIME);
+ if (tStr == null)
+ {
+ return -1;
+ }
+
+ return Long.parseLong(tStr);
+ }
+
+ public void setFinishTime(long t)
+ {
+ _record.setSimpleField(FINISH_TIME, String.valueOf(t));
+ }
+
+ public long getFinishTime()
+ {
+ String tStr = _record.getSimpleField(FINISH_TIME);
+ if (tStr == null)
+ {
+ return UNFINISHED;
+ }
+
+ return Long.parseLong(tStr);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/beans/TaskBean.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/beans/TaskBean.java b/helix-core/src/main/java/org/apache/helix/task/beans/TaskBean.java
new file mode 100644
index 0000000..2fe2f6f
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/beans/TaskBean.java
@@ -0,0 +1,30 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task.beans;
+
+
+import java.util.List;
+import java.util.Map;
+import org.apache.helix.task.TaskConfig;
+
+
+/**
+ * Bean class used for parsing task definitions from YAML.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskBean
+{
+ public String name;
+ public List<String> parents;
+ public String targetResource;
+ public List<String> targetPartitionStates;
+ public List<Integer> targetPartitions;
+ public String command;
+ public Map<String, Object> commandConfig;
+ public long timeoutPerPartition = TaskConfig.DEFAULT_TIMEOUT_PER_PARTITION;
+ public int numConcurrentTasksPerInstance = TaskConfig.DEFAULT_NUM_CONCURRENT_TASKS_PER_INSTANCE;
+ public int maxAttemptsPerPartition = TaskConfig.DEFAULT_MAX_ATTEMPTS_PER_PARTITION;
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/beans/WorkflowBean.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/beans/WorkflowBean.java b/helix-core/src/main/java/org/apache/helix/task/beans/WorkflowBean.java
new file mode 100644
index 0000000..e8fcd88
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/beans/WorkflowBean.java
@@ -0,0 +1,21 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task.beans;
+
+
+import java.util.List;
+
+
+/**
+ * Bean class used for parsing workflow definitions from YAML.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class WorkflowBean
+{
+ public String name;
+ public String expiry;
+ public List<TaskBean> tasks;
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/tools/ClusterSetup.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/tools/ClusterSetup.java b/helix-core/src/main/java/org/apache/helix/tools/ClusterSetup.java
index a39e571..2131c3c 100644
--- a/helix-core/src/main/java/org/apache/helix/tools/ClusterSetup.java
+++ b/helix-core/src/main/java/org/apache/helix/tools/ClusterSetup.java
@@ -164,6 +164,8 @@ public class ClusterSetup {
StateModelConfigGenerator.generateConfigForOnlineOffline()));
addStateModelDef(clusterName, "ScheduledTask", new StateModelDefinition(
StateModelConfigGenerator.generateConfigForScheduledTaskQueue()));
+ addStateModelDef(clusterName, "Task",
+ new StateModelDefinition(StateModelConfigGenerator.generateConfigForTaskStateModel()));
}
public void activateCluster(String clusterName, String grandCluster, boolean enable) {
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/tools/StateModelConfigGenerator.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/tools/StateModelConfigGenerator.java b/helix-core/src/main/java/org/apache/helix/tools/StateModelConfigGenerator.java
index 508e447..b8b3aeb 100644
--- a/helix-core/src/main/java/org/apache/helix/tools/StateModelConfigGenerator.java
+++ b/helix-core/src/main/java/org/apache/helix/tools/StateModelConfigGenerator.java
@@ -23,13 +23,15 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-
import org.apache.helix.ZNRecord;
import org.apache.helix.manager.zk.DefaultSchedulerMessageHandlerFactory;
import org.apache.helix.manager.zk.ZNRecordSerializer;
-import org.apache.helix.model.Transition;
import org.apache.helix.model.StateModelDefinition.StateModelDefinitionProperty;
+import org.apache.helix.model.Transition;
import org.apache.helix.model.builder.StateTransitionTableBuilder;
+import org.apache.helix.task.TaskPartitionState;
+import org.apache.helix.task.TaskConstants;
+
// TODO refactor to use StateModelDefinition.Builder
public class StateModelConfigGenerator {
@@ -348,4 +350,94 @@ public class StateModelConfigGenerator {
stateTransitionPriorityList);
return record;
}
+
+ public static ZNRecord generateConfigForTaskStateModel()
+ {
+ ZNRecord record = new ZNRecord(TaskConstants.STATE_MODEL_NAME);
+
+ record.setSimpleField(StateModelDefinitionProperty.INITIAL_STATE.toString(), TaskPartitionState.INIT.name());
+ List<String> statePriorityList = new ArrayList<String>();
+ statePriorityList.add(TaskPartitionState.INIT.name());
+ statePriorityList.add(TaskPartitionState.RUNNING.name());
+ statePriorityList.add(TaskPartitionState.STOPPED.name());
+ statePriorityList.add(TaskPartitionState.COMPLETED.name());
+ statePriorityList.add(TaskPartitionState.TIMED_OUT.name());
+ statePriorityList.add(TaskPartitionState.TASK_ERROR.name());
+ statePriorityList.add(TaskPartitionState.DROPPED.name());
+ record.setListField(StateModelDefinitionProperty.STATE_PRIORITY_LIST.toString(), statePriorityList);
+ for (String state : statePriorityList)
+ {
+ String key = state + ".meta";
+ Map<String, String> metadata = new HashMap<String, String>();
+ metadata.put("count", "-1");
+ record.setMapField(key, metadata);
+ }
+
+ List<String> states = new ArrayList<String>();
+ states.add(TaskPartitionState.INIT.name());
+ states.add(TaskPartitionState.RUNNING.name());
+ states.add(TaskPartitionState.STOPPED.name());
+ states.add(TaskPartitionState.COMPLETED.name());
+ states.add(TaskPartitionState.TIMED_OUT.name());
+ states.add(TaskPartitionState.TASK_ERROR.name());
+ states.add(TaskPartitionState.DROPPED.name());
+
+ List<Transition> transitions = new ArrayList<Transition>();
+ transitions.add(new Transition(TaskPartitionState.INIT.name(), TaskPartitionState.RUNNING.name()));
+ transitions.add(new Transition(TaskPartitionState.RUNNING.name(), TaskPartitionState.STOPPED.name()));
+ transitions.add(new Transition(TaskPartitionState.RUNNING.name(), TaskPartitionState.COMPLETED.name()));
+ transitions.add(new Transition(TaskPartitionState.RUNNING.name(), TaskPartitionState.TIMED_OUT.name()));
+ transitions.add(new Transition(TaskPartitionState.RUNNING.name(), TaskPartitionState.TASK_ERROR.name()));
+ transitions.add(new Transition(TaskPartitionState.STOPPED.name(), TaskPartitionState.RUNNING.name()));
+
+ // All states have a transition to DROPPED.
+ transitions.add(new Transition(TaskPartitionState.INIT.name(), TaskPartitionState.DROPPED.name()));
+ transitions.add(new Transition(TaskPartitionState.RUNNING.name(), TaskPartitionState.DROPPED.name()));
+ transitions.add(new Transition(TaskPartitionState.COMPLETED.name(), TaskPartitionState.DROPPED.name()));
+ transitions.add(new Transition(TaskPartitionState.STOPPED.name(), TaskPartitionState.DROPPED.name()));
+ transitions.add(new Transition(TaskPartitionState.TIMED_OUT.name(), TaskPartitionState.DROPPED.name()));
+ transitions.add(new Transition(TaskPartitionState.TASK_ERROR.name(), TaskPartitionState.DROPPED.name()));
+
+ // All states, except DROPPED, have a transition to INIT.
+ transitions.add(new Transition(TaskPartitionState.RUNNING.name(), TaskPartitionState.INIT.name()));
+ transitions.add(new Transition(TaskPartitionState.COMPLETED.name(), TaskPartitionState.INIT.name()));
+ transitions.add(new Transition(TaskPartitionState.STOPPED.name(), TaskPartitionState.INIT.name()));
+ transitions.add(new Transition(TaskPartitionState.TIMED_OUT.name(), TaskPartitionState.INIT.name()));
+ transitions.add(new Transition(TaskPartitionState.TASK_ERROR.name(), TaskPartitionState.INIT.name()));
+
+ StateTransitionTableBuilder builder = new StateTransitionTableBuilder();
+ Map<String, Map<String, String>> next = builder.buildTransitionTable(states, transitions);
+
+ for (String state : statePriorityList)
+ {
+ String key = state + ".next";
+ record.setMapField(key, next.get(state));
+ }
+
+ List<String> stateTransitionPriorityList = new ArrayList<String>();
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.INIT.name(), TaskPartitionState.RUNNING.name()));
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.RUNNING.name(), TaskPartitionState.STOPPED.name()));
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.RUNNING.name(), TaskPartitionState.COMPLETED.name()));
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.RUNNING.name(), TaskPartitionState.TIMED_OUT.name()));
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.RUNNING.name(), TaskPartitionState.TASK_ERROR.name()));
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.STOPPED.name(), TaskPartitionState.RUNNING.name()));
+
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.INIT.name(), TaskPartitionState.DROPPED.name()));
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.RUNNING.name(), TaskPartitionState.DROPPED.name()));
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.COMPLETED.name(), TaskPartitionState.DROPPED.name()));
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.STOPPED.name(), TaskPartitionState.DROPPED.name()));
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.TIMED_OUT.name(), TaskPartitionState.DROPPED.name()));
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.TASK_ERROR.name(), TaskPartitionState.DROPPED.name()));
+
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.RUNNING.name(), TaskPartitionState.INIT.name()));
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.COMPLETED.name(), TaskPartitionState.INIT.name()));
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.STOPPED.name(), TaskPartitionState.INIT.name()));
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.TIMED_OUT.name(), TaskPartitionState.INIT.name()));
+ stateTransitionPriorityList.add(String.format("%s-%s", TaskPartitionState.TASK_ERROR.name(), TaskPartitionState.INIT.name()));
+
+ record.setListField(StateModelDefinitionProperty.STATE_TRANSITION_PRIORITYLIST.toString(),
+ stateTransitionPriorityList);
+
+ return record;
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/test/java/org/apache/helix/DummyProcessThread.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/DummyProcessThread.java b/helix-core/src/test/java/org/apache/helix/DummyProcessThread.java
index f51aa1d..fbe20d5 100644
--- a/helix-core/src/test/java/org/apache/helix/DummyProcessThread.java
+++ b/helix-core/src/test/java/org/apache/helix/DummyProcessThread.java
@@ -27,9 +27,8 @@ import org.apache.log4j.Logger;
public class DummyProcessThread implements Runnable {
private static final Logger LOG = Logger.getLogger(DummyProcessThread.class);
-
- HelixManager _manager;
- String _instanceName;
+ private final HelixManager _manager;
+ private final String _instanceName;
public DummyProcessThread(HelixManager manager, String instanceName) {
_manager = manager;
@@ -40,8 +39,6 @@ public class DummyProcessThread implements Runnable {
public void run() {
try {
DummyStateModelFactory stateModelFactory = new DummyStateModelFactory(0);
- // StateMachineEngine genericStateMachineHandler =
- // new StateMachineEngine();
StateMachineEngine stateMach = _manager.getStateMachineEngine();
stateMach.registerStateModelFactory("MasterSlave", stateModelFactory);
@@ -51,9 +48,6 @@ public class DummyProcessThread implements Runnable {
new DummyOnlineOfflineStateModelFactory(10);
stateMach.registerStateModelFactory("LeaderStandby", stateModelFactory1);
stateMach.registerStateModelFactory("OnlineOffline", stateModelFactory2);
- // _manager.getMessagingService()
- // .registerMessageHandlerFactory(MessageType.STATE_TRANSITION.toString(),
- // genericStateMachineHandler);
_manager.connect();
Thread.currentThread().join();
@@ -61,9 +55,7 @@ public class DummyProcessThread implements Runnable {
String msg =
"participant:" + _instanceName + ", " + Thread.currentThread().getName() + " interrupted";
LOG.info(msg);
- // System.err.println(msg);
} catch (Exception e) {
- // TODO Auto-generated catch block
e.printStackTrace();
}
}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/test/java/org/apache/helix/integration/ZkIntegrationTestBase.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/integration/ZkIntegrationTestBase.java b/helix-core/src/test/java/org/apache/helix/integration/ZkIntegrationTestBase.java
index 2ab0aaf..fbf0601 100644
--- a/helix-core/src/test/java/org/apache/helix/integration/ZkIntegrationTestBase.java
+++ b/helix-core/src/test/java/org/apache/helix/integration/ZkIntegrationTestBase.java
@@ -79,8 +79,7 @@ public class ZkIntegrationTestBase {
}
protected String getShortClassName() {
- String className = this.getClass().getName();
- return className.substring(className.lastIndexOf('.') + 1);
+ return this.getClass().getSimpleName();
}
protected String getCurrentLeader(ZkClient zkClient, String clusterName) {
[04/15] Adding Helix-task-framework and Yarn integration modules
Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderProcess.java
new file mode 100644
index 0000000..9ea713c
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderProcess.java
@@ -0,0 +1,82 @@
+package org.apache.helix.metamanager.provider;
+
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.ClusterAdmin;
+import org.apache.helix.metamanager.ContainerProvider;
+import org.apache.helix.metamanager.HelixClusterAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Helix participant for ContainerProvider. Configurable via ProviderProperties
+ * and runnable service.
+ *
+ */
+public class ProviderProcess implements Service {
+ static final Logger log = Logger.getLogger(ProviderProcess.class);
+
+ ClusterAdmin admin;
+
+ ProviderProperties properties;
+ ContainerProvider provider;
+ HelixAdmin helixAdmin;
+ HelixManager participantManager;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ Preconditions.checkNotNull(properties);
+ ProviderProperties providerProperties = new ProviderProperties();
+ providerProperties.putAll(properties);
+ Preconditions.checkArgument(providerProperties.isValid());
+
+ this.properties = providerProperties;
+
+ }
+
+ public void setConteinerProvider(ContainerProvider provider) {
+ this.provider = provider;
+ }
+
+ @Override
+ public void start() throws Exception {
+ Preconditions.checkNotNull(provider);
+
+ log.info(String.format("Registering provider '%s' at '%s/%s'", properties.getName(), properties.getMetaAddress(), properties.getMetaCluster()));
+ HelixAdmin metaHelixAdmin = new ZKHelixAdmin(properties.getMetaAddress());
+ metaHelixAdmin.addInstance(properties.getMetaCluster(), new InstanceConfig(properties.getName()));
+ metaHelixAdmin.close();
+
+ log.info(String.format("Starting provider '%s'", properties.getName()));
+ helixAdmin = new ZKHelixAdmin(properties.getAddress());
+ admin = new HelixClusterAdmin(properties.getCluster(), helixAdmin);
+
+ participantManager = HelixManagerFactory.getZKHelixManager(properties.getMetaCluster(), properties.getName(), InstanceType.PARTICIPANT,
+ properties.getMetaAddress());
+ participantManager.getStateMachineEngine().registerStateModelFactory("OnlineOffline", new ProviderStateModelFactory(provider, admin));
+ participantManager.connect();
+
+ log.info(String.format("Successfully started provider '%s'", properties.getName()));
+ }
+
+ @Override
+ public void stop() {
+ log.info(String.format("Stopping provider '%s'", properties.getName()));
+ if (participantManager != null) {
+ participantManager.disconnect();
+ participantManager = null;
+ }
+ if (helixAdmin != null) {
+ helixAdmin.close();
+ helixAdmin = null;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderProperties.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderProperties.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderProperties.java
new file mode 100644
index 0000000..098592a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderProperties.java
@@ -0,0 +1,97 @@
+package org.apache.helix.metamanager.provider;
+
+import java.util.Collections;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.helix.metamanager.bootstrapper.BootUtils;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base configuration for {@link ProviderProcess}.
+ *
+ */
+public class ProviderProperties extends Properties {
+ /**
+ *
+ */
+ private static final long serialVersionUID = -2209509977839674160L;
+
+ public final static String ADDRESS = "address";
+ public final static String CLUSTER = "cluster";
+ public final static String METAADDRESS = "metaaddress";
+ public final static String METACLUSTER = "metacluster";
+ public final static String NAME = "name";
+
+ public final static String CONTAINER_NAMESPACE = "containers";
+
+ public boolean isValid() {
+ return(containsKey(ADDRESS) &&
+ containsKey(CLUSTER) &&
+ containsKey(METAADDRESS) &&
+ containsKey(METACLUSTER) &&
+ containsKey(NAME));
+ }
+
+ public String getAddress() {
+ return getProperty(ADDRESS);
+ }
+
+ public String getCluster() {
+ return getProperty(CLUSTER);
+ }
+
+ public String getMetaAddress() {
+ return getProperty(METAADDRESS);
+ }
+
+ public String getMetaCluster() {
+ return getProperty(METACLUSTER);
+ }
+
+ public String getName() {
+ return getProperty(NAME);
+ }
+
+ public Set<String> getContainers() {
+ if(!BootUtils.hasNamespace(this, CONTAINER_NAMESPACE))
+ return Collections.emptySet();
+ return BootUtils.getNamespaces(BootUtils.getNamespace(this, CONTAINER_NAMESPACE));
+ }
+
+ public boolean hasContainer(String id) {
+ if(!BootUtils.hasNamespace(this, CONTAINER_NAMESPACE)) return false;
+ if(!BootUtils.hasNamespace(BootUtils.getNamespace(this, CONTAINER_NAMESPACE), id)) return false;
+ return true;
+ }
+
+ public Properties getContainer(String id) {
+ Preconditions.checkArgument(BootUtils.hasNamespace(this, CONTAINER_NAMESPACE), "no container namespace");
+ Preconditions.checkArgument(BootUtils.hasNamespace(BootUtils.getNamespace(this, CONTAINER_NAMESPACE), id), "container %s not configured", id);
+ return BootUtils.getNamespace(BootUtils.getNamespace(this, CONTAINER_NAMESPACE), id);
+ }
+
+ public void addContainer(String id, Properties properties) {
+ Preconditions.checkArgument(!getContainers().contains(id), "Already contains container type %s", id);
+
+ // add container config
+ for(Map.Entry<Object, Object> entry : properties.entrySet()) {
+ this.put(CONTAINER_NAMESPACE + "." + id + "." + entry.getKey(), entry.getValue());
+ }
+ }
+
+ @Override
+ public Object get(Object key) {
+ Preconditions.checkState(containsKey(key));
+ return super.get(key);
+ }
+
+ @Override
+ public String getProperty(String key) {
+ Preconditions.checkState(containsKey(key));
+ return super.getProperty(key);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderRebalancer.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderRebalancer.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderRebalancer.java
new file mode 100644
index 0000000..4be1a05
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderRebalancer.java
@@ -0,0 +1,352 @@
+package org.apache.helix.metamanager.provider;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.rebalancer.Rebalancer;
+import org.apache.helix.controller.stages.ClusterDataCache;
+import org.apache.helix.controller.stages.CurrentStateOutput;
+import org.apache.helix.metamanager.StatusProvider;
+import org.apache.helix.metamanager.TargetProvider;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.LiveInstance;
+import org.apache.helix.model.Partition;
+import org.apache.helix.model.Resource;
+import org.apache.helix.model.ResourceAssignment;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Sets;
+
+/**
+ * Rebalancer for meta cluster. Polls {@link TargetProvider} and
+ * {@link StatusProvider} and reads and sets IdealState of meta cluster participants (
+ * {@link ProviderProcess}). The number of active container is set to the target
+ * count. Failed containers are shut down and restarted on any available
+ * provider. Also, container counts are balanced across multiple providers.<br/>
+ * <b>NOTE:</b> status and target provider are injected via
+ * {@link ProviderRebalancerSingleton}<br/>
+ * <br/>
+ * <b>IdealState mapping:</b><br/>
+ * resource = container type<br/>
+ * partition = logical container instance<br/>
+ * instance = container provider<br/>
+ * status = physical container instance presence<br/>
+ */
+public class ProviderRebalancer implements Rebalancer {
+
+ static final Logger log = Logger.getLogger(ProviderRebalancer.class);
+
+ static final long UPDATE_INTERVAL_MIN = 1500;
+
+ static final Object lock = new Object();
+ static long nextUpdate = 0;
+
+ TargetProvider targetProvider;
+ StatusProvider statusProvider;
+ HelixManager manager;
+
+ @Override
+ public void init(HelixManager manager) {
+ this.targetProvider = ProviderRebalancerSingleton.getTargetProvider();
+ this.statusProvider = ProviderRebalancerSingleton.getStatusProvider();
+ this.manager = manager;
+ }
+
+ @Override
+ public ResourceAssignment computeResourceMapping(Resource resource, IdealState idealState, CurrentStateOutput currentStateOutput,
+ ClusterDataCache clusterData) {
+
+ final String resourceName = resource.getResourceName();
+ final String containerType = resourceName;
+
+ final SortedSet<String> allContainers = Sets.newTreeSet(new IndexedNameComparator());
+ allContainers.addAll(idealState.getPartitionSet());
+
+ final SortedSet<String> allProviders = Sets.newTreeSet(new IndexedNameComparator());
+ for (LiveInstance instance : clusterData.getLiveInstances().values()) {
+ allProviders.add(instance.getId());
+ }
+
+ final ResourceState currentState = new ResourceState(resourceName, currentStateOutput);
+
+ // target container count
+ log.debug(String.format("Retrieving target container count for type '%s'", containerType));
+ int targetCount = -1;
+ try {
+ targetCount = targetProvider.getTargetContainerCount(containerType);
+ } catch (Exception e) {
+ log.error(String.format("Could not retrieve target count for '%s'", containerType), e);
+ return new ResourceAssignment(resourceName);
+ }
+
+ // provider sanity check
+ if (allProviders.isEmpty()) {
+ log.warn(String.format("Could not find any providers"));
+ return new ResourceAssignment(resourceName);
+ }
+
+ // all containers
+ SortedSet<String> assignedContainers = getAssignedContainers(currentState, allContainers);
+ SortedSet<String> failedContainers = getFailedContainers(currentState, allContainers);
+
+ log.info(String.format("Rebalancing '%s' (target=%d, active=%d, failures=%d)", resourceName, targetCount, assignedContainers.size(),
+ failedContainers.size()));
+
+ if (log.isDebugEnabled()) {
+ log.debug(String.format("%s: assigned containers %s", resourceName, assignedContainers));
+ log.debug(String.format("%s: failed containers %s", resourceName, failedContainers));
+ }
+
+ // assignment
+ int maxCountPerProvider = (int) Math.ceil(targetCount / (float) allProviders.size());
+
+ ResourceAssignment assignment = new ResourceAssignment(resourceName);
+ CountMap counts = new CountMap(allProviders);
+ int assignmentCount = 0;
+
+ // currently assigned
+ for (String containerName : assignedContainers) {
+ String providerName = getProvider(currentState, containerName);
+ Partition partition = new Partition(containerName);
+
+ if (failedContainers.contains(containerName)) {
+ log.warn(String.format("Container '%s:%s' failed, going offline", providerName, containerName));
+ assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "OFFLINE"));
+
+ } else if (counts.get(providerName) >= maxCountPerProvider) {
+ log.warn(String.format("Container '%s:%s' misassigned, going offline", providerName, containerName));
+ assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "OFFLINE"));
+
+ } else {
+ assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "ONLINE"));
+ }
+
+ counts.increment(providerName);
+ assignmentCount++;
+ }
+
+ // currently unassigned
+ SortedSet<String> unassignedContainers = Sets.newTreeSet(new IndexedNameComparator());
+ unassignedContainers.addAll(allContainers);
+ unassignedContainers.removeAll(assignedContainers);
+
+ for (String containerName : unassignedContainers) {
+ if (assignmentCount >= targetCount)
+ break;
+
+ String providerName = counts.getMinKey();
+ Partition partition = new Partition(containerName);
+
+ if (failedContainers.contains(containerName)) {
+ log.warn(String.format("Container '%s:%s' failed and unassigned, going offline", providerName, containerName));
+ assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "OFFLINE"));
+
+ } else {
+ assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "ONLINE"));
+ }
+
+ counts.increment(providerName);
+ assignmentCount++;
+ }
+
+ if (log.isDebugEnabled()) {
+ log.debug(String.format("assignment counts: %s", counts));
+ log.debug(String.format("assignment: %s", assignment));
+ }
+
+ return assignment;
+ }
+
+ boolean hasProvider(ResourceState state, String containerName) {
+ Map<String, String> currentStateMap = state.getCurrentStateMap(containerName);
+ Map<String, String> pendingStateMap = state.getPendingStateMap(containerName);
+ return hasInstance(currentStateMap, "ONLINE") || hasInstance(pendingStateMap, "ONLINE");
+ }
+
+ String getProvider(ResourceState state, String containerName) {
+ Map<String, String> currentStateMap = state.getCurrentStateMap(containerName);
+ if (hasInstance(currentStateMap, "ONLINE"))
+ return getInstance(currentStateMap, "ONLINE");
+
+ Map<String, String> pendingStateMap = state.getPendingStateMap(containerName);
+ return getInstance(pendingStateMap, "ONLINE");
+ }
+
+ SortedSet<String> getFailedContainers(ResourceState state, Collection<String> containers) {
+ SortedSet<String> failedContainers = Sets.newTreeSet(new IndexedNameComparator());
+ for (String containerName : containers) {
+ Map<String, String> currentStateMap = state.getCurrentStateMap(containerName);
+ Map<String, String> pendingStateMap = state.getPendingStateMap(containerName);
+
+ if (hasInstance(currentStateMap, "ERROR")) {
+ failedContainers.add(containerName);
+ continue;
+ }
+
+ if (!hasInstance(currentStateMap, "ONLINE") || hasInstance(pendingStateMap, "OFFLINE"))
+ continue;
+
+ // container listed online and not in transition, but not active
+ if (!statusProvider.isHealthy(containerName)) {
+ log.warn(String.format("Container '%s' designated ONLINE, but is not active", containerName));
+ failedContainers.add(containerName);
+ }
+ }
+ return failedContainers;
+ }
+
+ SortedSet<String> getAssignedContainers(ResourceState state, Collection<String> containers) {
+ SortedSet<String> assignedContainers = Sets.newTreeSet(new IndexedNameComparator());
+ for (String containerName : containers) {
+
+ if (!hasProvider(state, containerName))
+ continue;
+
+ assignedContainers.add(containerName);
+ }
+ return assignedContainers;
+ }
+
+ boolean hasInstance(Map<String, String> stateMap, String state) {
+ if (!stateMap.isEmpty()) {
+ for (Map.Entry<String, String> entry : stateMap.entrySet()) {
+ if (entry.getValue().equals(state)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ String getInstance(Map<String, String> stateMap, String state) {
+ if (!stateMap.isEmpty()) {
+ for (Map.Entry<String, String> entry : stateMap.entrySet()) {
+ if (entry.getValue().equals(state)) {
+ return entry.getKey();
+ }
+ }
+ }
+ throw new IllegalArgumentException(String.format("Could not find instance with state '%s'", state));
+ }
+
+ class IndexedNameComparator implements Comparator<String> {
+ Pattern pattern = Pattern.compile("^(.*)([0-9]+)$");
+
+ @Override
+ public int compare(String o1, String o2) {
+ Matcher m1 = pattern.matcher(o1);
+ Matcher m2 = pattern.matcher(o2);
+
+ boolean find1 = m1.find();
+ boolean find2 = m2.find();
+
+ if (!find1 && !find2)
+ return o1.compareTo(o2);
+
+ if (!find1 && find2)
+ return -1;
+
+ if (find1 && !find2)
+ return 1;
+
+ String name1 = m1.group(1);
+ String name2 = m2.group(1);
+
+ int name_comp = name1.compareTo(name2);
+ if (name_comp != 0)
+ return name_comp;
+
+ int index1 = Integer.valueOf(m1.group(2));
+ int index2 = Integer.valueOf(m2.group(2));
+
+ return (int) Math.signum(index1 - index2);
+ }
+ }
+
+ class CountMap extends HashMap<String, Integer> {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 3954138748385337978L;
+
+ public CountMap(Collection<String> keys) {
+ super();
+ for (String key : keys) {
+ put(key, 0);
+ }
+ }
+
+ @Override
+ public Integer get(Object key) {
+ Preconditions.checkArgument(containsKey(key), "Key %s not found", key);
+ return super.get(key);
+ }
+
+ public int increment(String key) {
+ int newValue = get(key) + 1;
+ Preconditions.checkArgument(containsKey(key), "Key %s not found", key);
+ put(key, newValue);
+ return newValue;
+ }
+
+ public String getMinKey() {
+ Preconditions.checkState(size() > 0, "Must contain at least one item");
+
+ String minKey = null;
+ int minValue = Integer.MAX_VALUE;
+
+ for (String key : keySet()) {
+ int value = get(key);
+ if (value < minValue) {
+ minValue = value;
+ minKey = key;
+ }
+ }
+
+ return minKey;
+ }
+
+ public String getMaxKey() {
+ Preconditions.checkState(size() > 0, "Must contain at least one item");
+
+ String maxKey = null;
+ int maxValue = Integer.MIN_VALUE;
+
+ for (String key : keySet()) {
+ int value = get(key);
+ if (value > maxValue) {
+ maxValue = value;
+ maxKey = key;
+ }
+ }
+
+ return maxKey;
+ }
+ }
+
+ class ResourceState {
+ final String resourceName;
+ final CurrentStateOutput state;
+
+ public ResourceState(String resourceName, CurrentStateOutput state) {
+ this.resourceName = resourceName;
+ this.state = state;
+ }
+
+ Map<String, String> getCurrentStateMap(String partitionName) {
+ return state.getCurrentStateMap(resourceName, new Partition(partitionName));
+ }
+
+ Map<String, String> getPendingStateMap(String partitionName) {
+ return state.getPendingStateMap(resourceName, new Partition(partitionName));
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderRebalancerSingleton.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderRebalancerSingleton.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderRebalancerSingleton.java
new file mode 100644
index 0000000..c46f5f5
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderRebalancerSingleton.java
@@ -0,0 +1,38 @@
+package org.apache.helix.metamanager.provider;
+
+import org.apache.helix.metamanager.StatusProvider;
+import org.apache.helix.metamanager.TargetProvider;
+import org.apache.log4j.Logger;
+
+/**
+ * Utility for dependency injection into ProviderRebalancer.
+ *
+ */
+public class ProviderRebalancerSingleton {
+
+ static final Logger log = Logger.getLogger(ProviderRebalancerSingleton.class);
+
+ static TargetProvider targetProvider;
+ static StatusProvider statusProvider;
+
+ private ProviderRebalancerSingleton() {
+ // left blank
+ }
+
+ public static TargetProvider getTargetProvider() {
+ return targetProvider;
+ }
+
+ public static void setTargetProvider(TargetProvider targetProvider) {
+ ProviderRebalancerSingleton.targetProvider = targetProvider;
+ }
+
+ public static StatusProvider getStatusProvider() {
+ return statusProvider;
+ }
+
+ public static void setStatusProvider(StatusProvider statusProvider) {
+ ProviderRebalancerSingleton.statusProvider = statusProvider;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderStateModel.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderStateModel.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderStateModel.java
new file mode 100644
index 0000000..090f807
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderStateModel.java
@@ -0,0 +1,114 @@
+package org.apache.helix.metamanager.provider;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.ClusterAdmin;
+import org.apache.helix.metamanager.ContainerProvider;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Helix state model implementation for {@link ContainerProvider}s. Updates
+ * configuration of managed Helix cluster and spawns and destroys container
+ * instances.
+ *
+ */
+@StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE" })
+public class ProviderStateModel extends StateModel {
+
+ static final Logger log = Logger.getLogger(ProviderStateModel.class);
+
+ ContainerProvider provider;
+ ClusterAdmin admin;
+
+ public ProviderStateModel(ContainerProvider provider, ClusterAdmin admin) {
+ this.provider = provider;
+ this.admin = admin;
+ }
+
+ @Transition(from = "OFFLINE", to = "ONLINE")
+ public void acquire(Message m, NotificationContext context) throws Exception {
+ String containerType = m.getResourceName();
+ String containerId = m.getPartitionName();
+ String instanceId = context.getManager().getInstanceName();
+
+ log.trace(String.format("%s:%s transitioning from OFFLINE to ONLINE", containerId, instanceId));
+
+ bestEffortRemove(containerId);
+
+ // add instance to cluster
+ admin.addInstance(containerId, containerType);
+
+ // create container
+ provider.create(containerId, containerType);
+
+ try {
+ admin.rebalance();
+ } catch (Exception e) {
+ // ignore
+ log.warn(String.format("rebalancing cluster failed (error='%s')", e.getMessage()));
+ }
+
+ log.info(String.format("%s acquired container '%s' (type='%s')", instanceId, containerId, containerType));
+ }
+
+ @Transition(from = "ONLINE", to = "OFFLINE")
+ public void release(Message m, NotificationContext context) {
+ String containerId = m.getPartitionName();
+ String instanceId = context.getManager().getInstanceName();
+
+ log.trace(String.format("%s:%s transitioning from ONLINE to OFFLINE", containerId, instanceId));
+
+ bestEffortRemove(containerId);
+
+ try {
+ admin.rebalance();
+ } catch (Exception e) {
+ // ignore
+ log.warn(String.format("rebalancing cluster failed (error='%s')", e.getMessage()));
+ }
+
+ log.info(String.format("%s destroyed container '%s'", instanceId, containerId));
+
+ }
+
+ @Transition(from = "ERROR", to = "OFFLINE")
+ public void recover(Message m, NotificationContext context) {
+ String containerId = m.getPartitionName();
+ String instanceId = context.getManager().getInstanceName();
+
+ log.trace(String.format("%s:%s transitioning from ERROR to OFFLINE", containerId, instanceId));
+
+ release(m, context);
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void drop(Message m, NotificationContext context) {
+ String containerId = m.getPartitionName();
+ String instanceId = context.getManager().getInstanceName();
+
+ log.trace(String.format("%s:%s transitioning from OFFLINE to DROPPED", containerId, instanceId));
+ }
+
+ private void bestEffortRemove(String containerId) {
+ log.debug(String.format("Best effort removal of container '%s'", containerId));
+
+ try {
+ provider.destroy(containerId);
+ log.debug(String.format("Container '%s' destroyed", containerId));
+ } catch (Exception e) {
+ log.debug(String.format("Container '%s' does not exist", containerId));
+ }
+
+ try {
+ admin.removeInstance(containerId);
+ log.debug(String.format("Instance '%s' removed", containerId));
+ } catch (Exception e) {
+ log.debug(String.format("Instance '%s' does not exist", containerId));
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderStateModelFactory.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderStateModelFactory.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderStateModelFactory.java
new file mode 100644
index 0000000..36a071a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/ProviderStateModelFactory.java
@@ -0,0 +1,27 @@
+package org.apache.helix.metamanager.provider;
+
+import org.apache.helix.metamanager.ClusterAdmin;
+import org.apache.helix.metamanager.ContainerProvider;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+
+/**
+ * Factory for {@link ProviderStateModel}. Injects {@link ClusterAdmin} for
+ * managed cluster and {@link ContainerProvider}.
+ *
+ */
+class ProviderStateModelFactory extends StateModelFactory<ProviderStateModel> {
+
+ final ContainerProvider provider;
+ final ClusterAdmin admin;
+
+ public ProviderStateModelFactory(ContainerProvider provider, ClusterAdmin admin) {
+ super();
+ this.provider = provider;
+ this.admin = admin;
+ }
+
+ @Override
+ public ProviderStateModel createNewStateModel(String partitionName) {
+ return new ProviderStateModel(provider, admin);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerProvider.java
new file mode 100644
index 0000000..b63d760
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerProvider.java
@@ -0,0 +1,75 @@
+package org.apache.helix.metamanager.provider.local;
+
+import java.util.HashSet;
+import java.util.Map;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+import org.apache.helix.metamanager.managed.ContainerProcess;
+import org.apache.helix.metamanager.provider.local.LocalContainerSingleton.LocalProcess;
+import org.apache.log4j.Logger;
+
+public class LocalContainerProvider implements ClusterContainerProvider {
+
+ static final Logger log = Logger.getLogger(LocalContainerProvider.class);
+
+ static final String REQUIRED_TYPE = "container";
+
+ final String zkAddress;
+ final String clusterName;
+ final String providerName;
+
+ public LocalContainerProvider(String zkAddress, String clusterName, String providerName) {
+ this.zkAddress = zkAddress;
+ this.clusterName = clusterName;
+ this.providerName = providerName;
+ }
+
+ @Override
+ public void create(String id, String type) throws Exception {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ if(processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' already exists", id));
+
+ if(!type.equals(REQUIRED_TYPE))
+ throw new IllegalArgumentException(String.format("Type '%s' not supported", type));
+
+ log.info(String.format("Running container '%s' (zkAddress='%s', clusterName='%s')", id, zkAddress, clusterName));
+
+ ContainerProcess process = new ContainerProcess(clusterName, zkAddress, id);
+ process.start();
+
+ processes.put(id, new LocalProcess(id, providerName, process));
+
+ }
+ }
+
+ @Override
+ public void destroy(String id) throws Exception {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ if(!processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+ log.info(String.format("Destroying container '%s'", id));
+
+ LocalProcess local = processes.remove(id);
+
+ local.process.stop();
+ }
+ }
+
+ @Override
+ public void destroyAll() {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ log.info("Destroying all processes");
+ for(String id : new HashSet<String>(processes.keySet())) {
+ try { destroy(id); } catch (Exception ignore) {}
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerSingleton.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerSingleton.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerSingleton.java
new file mode 100644
index 0000000..d25d3ba
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerSingleton.java
@@ -0,0 +1,40 @@
+package org.apache.helix.metamanager.provider.local;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.helix.metamanager.managed.ContainerProcess;
+
+public class LocalContainerSingleton {
+ final static Map<String, LocalProcess> processes = new HashMap<String, LocalProcess>();
+
+ private LocalContainerSingleton() {
+ // left blank
+ }
+
+ public static Map<String, LocalProcess> getProcesses() {
+ return processes;
+ }
+
+ public static void reset() {
+ synchronized (processes) {
+ for(LocalProcess local : processes.values()) {
+ local.process.stop();
+ }
+ processes.clear();
+ }
+ }
+
+ static class LocalProcess {
+ final String id;
+ final String owner;
+ final ContainerProcess process;
+
+ public LocalProcess(String id, String owner, ContainerProcess process) {
+ this.id = id;
+ this.owner = owner;
+ this.process = process;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerStatusProvider.java
new file mode 100644
index 0000000..383a0d7
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/local/LocalContainerStatusProvider.java
@@ -0,0 +1,37 @@
+package org.apache.helix.metamanager.provider.local;
+
+import java.util.Map;
+
+import org.apache.helix.metamanager.ClusterContainerStatusProvider;
+import org.apache.helix.metamanager.provider.local.LocalContainerSingleton.LocalProcess;
+
+public class LocalContainerStatusProvider implements ClusterContainerStatusProvider {
+
+ @Override
+ public boolean exists(String id) {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ return processes.containsKey(id);
+ }
+ }
+
+ @Override
+ public boolean isActive(String id) {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ return processes.get(id).process != null;
+ }
+ }
+
+ @Override
+ public boolean isFailed(String id) {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ return processes.get(id).process == null;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerProvider.java
new file mode 100644
index 0000000..eef730a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerProvider.java
@@ -0,0 +1,81 @@
+package org.apache.helix.metamanager.provider.shell;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+import org.apache.helix.metamanager.provider.shell.ShellContainerSingleton.ShellProcess;
+import org.apache.log4j.Logger;
+
+public class ShellContainerProvider implements ClusterContainerProvider {
+
+ static final Logger log = Logger.getLogger(ShellContainerProvider.class);
+
+ static final String REQUIRED_TYPE = "container";
+ static final String RUN_COMMAND = "/bin/sh";
+
+ // global view of processes required
+ static final Object staticLock = new Object();
+ static final Map<String, ShellProcess> processes = new HashMap<String, ShellProcess>();
+
+ final String zkAddress;
+ final String clusterName;
+ final String command;
+ final String providerName;
+
+ public ShellContainerProvider(String zkAddress, String clusterName, String providerName, String command) {
+ this.zkAddress = zkAddress;
+ this.clusterName = clusterName;
+ this.command = command;
+ this.providerName = providerName;
+ }
+
+ @Override
+ public void create(String id, String type) throws Exception {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ if(processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' already exists", id));
+
+ if(!type.equals(REQUIRED_TYPE))
+ throw new IllegalArgumentException(String.format("Type '%s' not supported", type));
+
+ log.info(String.format("Running container '%s' (zkAddress='%s', clusterName='%s', command='%s')", id, zkAddress, clusterName, command));
+
+ ProcessBuilder builder = new ProcessBuilder(RUN_COMMAND, command, zkAddress, clusterName, id);
+ Process process = builder.start();
+
+ processes.put(id, new ShellProcess(id, providerName, process));
+ }
+ }
+
+ @Override
+ public void destroy(String id) throws Exception {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ if(!processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+ log.info(String.format("Destroying container '%s'", id));
+
+ ShellProcess shell = processes.remove(id);
+ shell.process.destroy();
+ shell.process.waitFor();
+ }
+ }
+
+ @Override
+ public void destroyAll() {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ log.info("Destroying all processes");
+ for(ShellProcess process : new HashSet<ShellProcess>(processes.values())) {
+ try { destroy(process.id); } catch (Exception ignore) {}
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerSingleton.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerSingleton.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerSingleton.java
new file mode 100644
index 0000000..ae7f3c1
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerSingleton.java
@@ -0,0 +1,38 @@
+package org.apache.helix.metamanager.provider.shell;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class ShellContainerSingleton {
+ static final Map<String, ShellProcess> processes = new HashMap<String, ShellProcess>();
+
+ private ShellContainerSingleton() {
+ // left blank
+ }
+
+ public static Map<String, ShellProcess> getProcesses() {
+ return processes;
+ }
+
+ public static void reset() {
+ synchronized (processes) {
+ for(ShellProcess local : processes.values()) {
+ local.process.destroy();
+ try { local.process.waitFor(); } catch(Exception ignore) {}
+ }
+ processes.clear();
+ }
+ }
+
+ static class ShellProcess {
+ final String id;
+ final String owner;
+ final Process process;
+
+ public ShellProcess(String id, String owner, Process process) {
+ this.id = id;
+ this.owner = owner;
+ this.process = process;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerStatusProvider.java
new file mode 100644
index 0000000..0030c2d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/shell/ShellContainerStatusProvider.java
@@ -0,0 +1,52 @@
+package org.apache.helix.metamanager.provider.shell;
+
+import java.util.Map;
+
+import org.apache.helix.metamanager.ClusterContainerStatusProvider;
+import org.apache.helix.metamanager.provider.shell.ShellContainerSingleton.ShellProcess;
+
+public class ShellContainerStatusProvider implements ClusterContainerStatusProvider {
+
+ @Override
+ public boolean exists(String id) {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ return processes.containsKey(id);
+ }
+ }
+
+ @Override
+ public boolean isActive(String id) {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ ShellProcess shell = processes.get(id);
+
+ try {
+ shell.process.exitValue();
+ return false;
+ } catch (IllegalThreadStateException e) {
+ // still running
+ return true;
+ }
+ }
+ }
+
+ @Override
+ public boolean isFailed(String id) {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ ShellProcess shell = processes.get(id);
+
+ try {
+ return (shell.process.exitValue() != 0);
+ } catch (IllegalThreadStateException e) {
+ // still running
+ return false;
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ApplicationConfig.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ApplicationConfig.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ApplicationConfig.java
new file mode 100644
index 0000000..4c8f303
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ApplicationConfig.java
@@ -0,0 +1,32 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+public class ApplicationConfig {
+ final String clusterAddress;
+ final String clusterName;
+ final String metadataAddress;
+ final String providerName;
+
+ public ApplicationConfig(String clusterAddress, String clusterName,
+ String metadataAddress, String providerName) {
+ this.clusterAddress = clusterAddress;
+ this.clusterName = clusterName;
+ this.metadataAddress = metadataAddress;
+ this.providerName = providerName;
+ }
+
+ public String getClusterAddress() {
+ return clusterAddress;
+ }
+
+ public String getClusterName() {
+ return clusterName;
+ }
+
+ public String getMetadataAddress() {
+ return metadataAddress;
+ }
+
+ public String getProviderName() {
+ return providerName;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ContainerMetadata.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ContainerMetadata.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ContainerMetadata.java
new file mode 100644
index 0000000..73d1a1b
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ContainerMetadata.java
@@ -0,0 +1,50 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+
+class ContainerMetadata {
+
+ static enum ContainerState {
+ ACQUIRE,
+ CONNECTING,
+ ACTIVE,
+ TEARDOWN,
+ FAILED,
+ HALTED,
+ FINALIZE
+ }
+
+ String id;
+ ContainerState state;
+ int yarnId;
+ String command;
+ String owner;
+
+ public ContainerMetadata() {
+ // left blank
+ }
+
+ public ContainerMetadata(String id, String command, String owner) {
+ this.id = id;
+ this.state = ContainerState.ACQUIRE;
+ this.yarnId = -1;
+ this.command = command;
+ this.owner = owner;
+ }
+
+ public ContainerMetadata(ContainerMetadata node, ContainerState state) {
+ this.id = node.id;
+ this.state = state;
+ this.yarnId = node.yarnId;
+ this.command = node.command;
+ this.owner = node.owner;
+ }
+
+ public ContainerMetadata(ContainerMetadata node, ContainerState state, int yarnId) {
+ this.id = node.id;
+ this.state = state;
+ this.yarnId = yarnId;
+ this.command = node.command;
+ this.owner = node.owner;
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/MetadataService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/MetadataService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/MetadataService.java
new file mode 100644
index 0000000..dc6c060
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/MetadataService.java
@@ -0,0 +1,42 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import java.util.Collection;
+
+public interface MetadataService {
+
+ public boolean exists(String id);
+
+ public void create(ContainerMetadata meta) throws MetadataServiceException;
+
+ public ContainerMetadata read(String id) throws MetadataServiceException;
+
+ public Collection<ContainerMetadata> readAll() throws MetadataServiceException;
+
+ public void update(ContainerMetadata meta) throws MetadataServiceException;
+
+ public void delete(String id) throws MetadataServiceException;
+
+ public static class MetadataServiceException extends Exception {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = -2846997013918977056L;
+
+ public MetadataServiceException() {
+ super();
+ }
+
+ public MetadataServiceException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public MetadataServiceException(String message) {
+ super(message);
+ }
+
+ public MetadataServiceException(Throwable cause) {
+ super(cause);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/Utils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/Utils.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/Utils.java
new file mode 100644
index 0000000..82871f1
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/Utils.java
@@ -0,0 +1,94 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.provider.yarn.ContainerMetadata.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.TypeAdapter;
+import com.google.gson.stream.JsonReader;
+import com.google.gson.stream.JsonToken;
+import com.google.gson.stream.JsonWriter;
+
+public class Utils {
+
+ static final Logger log = Logger.getLogger(Utils.class);
+
+ static Gson gson;
+ static {
+ GsonBuilder builder = new GsonBuilder();
+ builder.registerTypeAdapter(ContainerState.class, new ContainerStateAdapter());
+ builder.setPrettyPrinting();
+ gson = builder.create();
+ }
+
+ static Map<String, LocalResource> dummyResources = createDummyResources();
+
+ static String toJson(ContainerMetadata meta) {
+ return gson.toJson(meta);
+ }
+
+ static ContainerMetadata fromJson(String json) {
+ return gson.fromJson(json, ContainerMetadata.class);
+ }
+
+ static Map<String, LocalResource> getDummyResources() {
+ return dummyResources;
+ }
+
+ private static Map<String, LocalResource> createDummyResources() {
+ File dummy = new File("/tmp/dummy");
+
+ if(!dummy.exists()) {
+ try {
+ dummy.createNewFile();
+ } catch(Exception e) {
+ log.error("could not create dummy file", e);
+ System.exit(1);
+ }
+ }
+
+ Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
+ Path path = new Path(dummy.toURI());
+ LocalResource localResource = Records.newRecord(LocalResource.class);
+ localResource.setType(LocalResourceType.FILE);
+ localResource.setVisibility(LocalResourceVisibility.APPLICATION);
+ localResource.setResource(ConverterUtils.getYarnUrlFromPath(path));
+ localResource.setTimestamp(dummy.lastModified());
+ localResource.setSize(dummy.length());
+ localResources.put("dummy", localResource);
+ return localResources;
+ }
+
+ static class ContainerStateAdapter extends TypeAdapter<ContainerState> {
+ @Override
+ public ContainerState read(JsonReader reader) throws IOException {
+ if (reader.peek() == JsonToken.NULL) {
+ reader.nextNull();
+ return null;
+ }
+ return ContainerState.valueOf(reader.nextString());
+ }
+
+ @Override
+ public void write(JsonWriter writer, ContainerState value) throws IOException {
+ if (value == null) {
+ writer.nullValue();
+ return;
+ }
+ writer.value(value.name());
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnApplication.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnApplication.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnApplication.java
new file mode 100644
index 0000000..c4f3668
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnApplication.java
@@ -0,0 +1,125 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import java.net.InetSocketAddress;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.ClientRMProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+public class YarnApplication {
+
+ static final Logger log = Logger.getLogger(YarnApplication.class);
+
+ static final String ENV_CLUSTER_ADDRESS = "YA_CLUSTER_ADDRESS";
+ static final String ENV_CLUSTER_NAME = "YA_CLUSTER_NAME";
+ static final String ENV_METADATA_ADDRESS = "YA_METADATA_ADDRESS";
+ static final String ENV_PROVIDER_NAME = "YA_PROVIDER_NAME";
+
+ static final String MASTER_COMMAND = "/bin/sh /home/apucher/incubator-helix/recipes/meta-cluster-manager/target/meta-cluster-manager-pkg/bin/yarn-master-process.sh 1>%s/stdout 2>%s/stderr";
+
+ Configuration conf;
+ YarnRPC rpc;
+ ClientRMProtocol rmClient;
+ ApplicationId appId;
+
+ final ApplicationConfig appConfig;
+
+ public YarnApplication(ApplicationConfig appConfig) {
+ this.appConfig = appConfig;
+ configure(new YarnConfiguration());
+ }
+
+ public void start() throws Exception {
+ connect();
+
+ String command = String.format(MASTER_COMMAND, "/tmp/" + appConfig.providerName, "/tmp/" + appConfig.providerName);
+ //ApplicationConstants.LOG_DIR_EXPANSION_VAR, ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+
+ log.info(String.format("Starting application '%s' provider '%s' (masterCommand='%s')", appConfig.metadataAddress, appConfig.providerName, command));
+
+ // app id
+ GetNewApplicationRequest appRequest = Records.newRecord(GetNewApplicationRequest.class);
+ GetNewApplicationResponse appResponse = rmClient.getNewApplication(appRequest);
+
+ this.appId = appResponse.getApplicationId();
+
+ log.info(String.format("Acquired app id '%s' for '%s'", appId.toString(), appConfig.providerName));
+
+ // command
+ ContainerLaunchContext launchContext = Records.newRecord(ContainerLaunchContext.class);
+ launchContext.setCommands(Collections.singletonList(command));
+
+ // resource limit
+ Resource resource = Records.newRecord(Resource.class);
+ resource.setMemory(256); // TODO make dynamic
+ launchContext.setResource(resource);
+
+ // environment
+ Map<String, String> env = new HashMap<String, String>();
+ env.put(ENV_CLUSTER_ADDRESS, appConfig.clusterAddress);
+ env.put(ENV_CLUSTER_NAME, appConfig.clusterName);
+ env.put(ENV_METADATA_ADDRESS, appConfig.metadataAddress);
+ env.put(ENV_PROVIDER_NAME, appConfig.providerName);
+ launchContext.setEnvironment(env);
+
+ // local resources
+ // YARN workaround: create dummy resource
+ Map<String, LocalResource> localResources = Utils.getDummyResources();
+ launchContext.setLocalResources(localResources);
+
+ // app submission
+ ApplicationSubmissionContext subContext = Records.newRecord(ApplicationSubmissionContext.class);
+ subContext.setApplicationId(appId);
+ subContext.setApplicationName(appConfig.providerName);
+ subContext.setAMContainerSpec(launchContext);
+
+ SubmitApplicationRequest subRequest = Records.newRecord(SubmitApplicationRequest.class);
+ subRequest.setApplicationSubmissionContext(subContext);
+
+ log.info(String.format("Starting app id '%s'", appId.toString()));
+
+ rmClient.submitApplication(subRequest);
+
+ }
+
+ public void stop() throws YarnRemoteException {
+ log.info(String.format("Stopping app id '%s'", appId.toString()));
+ KillApplicationRequest killRequest = Records.newRecord(KillApplicationRequest.class);
+ killRequest.setApplicationId(appId);
+
+ rmClient.forceKillApplication(killRequest);
+ }
+
+ void configure(Configuration conf) {
+ this.conf = Preconditions.checkNotNull(conf);
+ this.rpc = YarnRPC.create(conf);
+ }
+
+ void connect() {
+ YarnConfiguration yarnConf = new YarnConfiguration(conf);
+ InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(
+ YarnConfiguration.RM_ADDRESS,
+ YarnConfiguration.DEFAULT_RM_ADDRESS));
+ log.info("Connecting to ResourceManager at: " + rmAddress);
+ this.rmClient = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, conf));
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerProcess.java
new file mode 100644
index 0000000..0d997bf
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerProcess.java
@@ -0,0 +1,60 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.log4j.Logger;
+
+public class YarnContainerProcess {
+ static final Logger log = Logger.getLogger(YarnContainerProcess.class);
+
+ public static void main(String[] args) throws Exception {
+ log.trace("BEGIN YarnProcess.main()");
+
+ final String clusterAddress = args[0];
+ final String clusterName = args[1];
+ final String metadataAddress = args[2];
+ final String providerName = args[3];
+ final String containerId = args[4];
+
+ final ApplicationConfig appConfig = new ApplicationConfig(clusterAddress, clusterName, metadataAddress, providerName);
+
+ log.debug("Launching metadata service");
+ final ZookeeperMetadataService metaService = new ZookeeperMetadataService(metadataAddress);
+ metaService.startService();
+
+ log.debug("Launching yarn container service");
+ final YarnContainerService yarnProcess = new YarnContainerService(appConfig, metaService, containerId);
+ yarnProcess.startService();
+
+ log.debug("Installing shutdown hooks");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ yarnProcess.stopService();
+ metaService.stopService();
+ }
+ }));
+
+ System.out.println("Press ENTER to stop container process");
+ System.in.read();
+
+ log.trace("END YarnProcess.main()");
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerProvider.java
new file mode 100644
index 0000000..9f09d46
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerProvider.java
@@ -0,0 +1,108 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import java.util.concurrent.TimeoutException;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+import org.apache.helix.metamanager.provider.yarn.ContainerMetadata.ContainerState;
+import org.apache.helix.metamanager.provider.yarn.MetadataService.MetadataServiceException;
+import org.apache.log4j.Logger;
+
+public class YarnContainerProvider implements ClusterContainerProvider {
+
+ static final Logger log = Logger.getLogger(YarnContainerProvider.class);
+
+ static final long POLL_INTERVAL = 1000;
+
+ static final String REQUIRED_TYPE = "container";
+
+ static final long CONTAINER_TIMEOUT = 10000;
+
+ /*
+ * CONTAINERS
+ * A (A, READY)
+ * B (B, RUNNING)
+ */
+
+ final ApplicationConfig appConfig;
+ final String command;
+
+ final Object notifier = new Object();
+
+ ZookeeperMetadataService metaService;
+
+ public YarnContainerProvider(ApplicationConfig appConfig, String command) {
+ this.appConfig = appConfig;
+ this.command = command;
+ }
+
+ @Override
+ public void create(final String id, final String type) throws Exception {
+ if(!REQUIRED_TYPE.equals(type)) {
+ throw new IllegalArgumentException(String.format("Type '%s' not supported", type));
+ }
+
+ metaService.create(new ContainerMetadata(id, command, appConfig.providerName));
+ waitForState(id, ContainerState.ACTIVE);
+ }
+
+ @Override
+ public void destroy(final String id) throws Exception {
+ ContainerMetadata meta = metaService.read(id);
+
+ if(meta.state == ContainerState.ACTIVE) {
+ log.info(String.format("Destroying active container, going to teardown"));
+ metaService.update(new ContainerMetadata(meta, ContainerState.TEARDOWN));
+
+ } else if(meta.state == ContainerState.FAILED) {
+ log.info(String.format("Destroying failed container, going to halted"));
+ metaService.update(new ContainerMetadata(meta, ContainerState.HALTED));
+
+ } else if(meta.state == ContainerState.FINALIZE) {
+ log.info(String.format("Destroying finalized container, skipping"));
+
+ } else {
+ throw new IllegalStateException(String.format("Container '%s' must be active, failed or finalized", id));
+ }
+
+ waitForState(id, ContainerState.FINALIZE);
+ metaService.delete(id);
+ }
+
+ @Override
+ public void destroyAll() {
+ try {
+ for(ContainerMetadata meta : metaService.readAll()) {
+ try { destroy(meta.id); } catch (Exception ignore) {}
+ }
+ } catch (Exception ignore) {
+ // ignore
+ }
+ }
+
+ public void startService() {
+ log.debug("Starting yarn container provider service");
+ metaService = new ZookeeperMetadataService(appConfig.metadataAddress);
+ metaService.startService();
+ }
+
+ public void stopService() {
+ log.debug("Stopping yarn container provider service");
+ if(metaService != null) {
+ metaService.stopService();
+ metaService = null;
+ }
+ }
+
+ void waitForState(String id, ContainerState state) throws MetadataServiceException, InterruptedException, TimeoutException {
+ long limit = System.currentTimeMillis() + CONTAINER_TIMEOUT;
+ ContainerMetadata meta = metaService.read(id);
+ while(meta.state != state) {
+ if(System.currentTimeMillis() >= limit) {
+ throw new TimeoutException(String.format("Container '%s' failed to reach state '%s' (currently is '%s')", id, state, meta.state));
+ }
+ Thread.sleep(POLL_INTERVAL);
+ meta = metaService.read(id);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerService.java
new file mode 100644
index 0000000..8abd8df
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerService.java
@@ -0,0 +1,129 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.metamanager.managed.ManagedFactory;
+import org.apache.helix.metamanager.provider.yarn.ContainerMetadata.ContainerState;
+import org.apache.log4j.Logger;
+
+public class YarnContainerService {
+ static final Logger log = Logger.getLogger(YarnContainerService.class);
+
+ static final long CONTAINERSERVICE_INTERVAL = 1000;
+
+ final ApplicationConfig appConfig;
+ final String containerId;
+
+ HelixManager participantManager;
+
+ MetadataService metaService;
+ ScheduledExecutorService executor;
+
+ public YarnContainerService(ApplicationConfig appConfig, MetadataService metaService, String containerId) {
+ this.appConfig = appConfig;
+ this.metaService = metaService;
+ this.containerId = containerId;
+ }
+
+ public void startService() {
+ log.debug("starting yarn container service");
+
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new ContainerService(), 0, CONTAINERSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+ }
+
+ public void stopService() {
+ log.debug("stopping yarn container service");
+
+ if(executor != null) {
+ executor.shutdown();
+ while(!executor.isTerminated()) {
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ }
+ executor = null;
+ }
+ }
+
+ public void startParticipant() throws Exception {
+ log.info("STARTING " + containerId);
+ participantManager = HelixManagerFactory.getZKHelixManager(appConfig.clusterName,
+ containerId, InstanceType.PARTICIPANT, appConfig.clusterAddress);
+ participantManager.getStateMachineEngine().registerStateModelFactory(
+ "MasterSlave", new ManagedFactory());
+ participantManager.connect();
+ log.info("STARTED " + containerId);
+ }
+
+ public void stopParticipant() {
+ if (participantManager != null) {
+ participantManager.disconnect();
+ participantManager = null;
+ }
+ }
+
+ class ContainerService implements Runnable {
+ @Override
+ public void run() {
+ log.info("updating container status");
+
+ try {
+ ContainerMetadata meta = metaService.read(containerId);
+
+ if(meta.state == ContainerState.CONNECTING) {
+ log.info("container connecting, going to active");
+ try {
+ startParticipant();
+ metaService.update(new ContainerMetadata(meta, ContainerState.ACTIVE));
+ } catch (Exception e) {
+ log.error("Failed to start participant, going to failed", e);
+ stopParticipant();
+ metaService.update(new ContainerMetadata(meta, ContainerState.FAILED));
+ }
+ }
+
+ if(meta.state == ContainerState.ACTIVE) {
+ // do something
+ // and go to failed on error
+ }
+
+ if(meta.state == ContainerState.TEARDOWN) {
+ log.info("container teardown, going to halted");
+ stopParticipant();
+ metaService.update(new ContainerMetadata(meta, ContainerState.HALTED));
+ }
+
+ } catch(Exception e) {
+ log.error(String.format("Error while updating container '%s' status", containerId), e);
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerStatusProvider.java
new file mode 100644
index 0000000..54aa3da
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnContainerStatusProvider.java
@@ -0,0 +1,52 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import org.apache.helix.metamanager.ClusterContainerStatusProvider;
+import org.apache.helix.metamanager.provider.yarn.ContainerMetadata.ContainerState;
+import org.apache.helix.metamanager.provider.yarn.MetadataService.MetadataServiceException;
+
+public class YarnContainerStatusProvider implements ClusterContainerStatusProvider {
+
+ final String metadataAddress;
+
+ ZookeeperMetadataService metaService;
+
+ public YarnContainerStatusProvider(String metadataAddress) {
+ this.metadataAddress = metadataAddress;
+ this.metaService = new ZookeeperMetadataService(metadataAddress);
+ }
+
+ @Override
+ public boolean exists(String id) {
+ return metaService.exists(id);
+ }
+
+ @Override
+ public boolean isActive(String id) {
+ try {
+ return metaService.read(id).state == ContainerState.ACTIVE;
+ } catch (MetadataServiceException e) {
+ return false;
+ }
+ }
+
+ @Override
+ public boolean isFailed(String id) {
+ try {
+ return metaService.read(id).state == ContainerState.FAILED;
+ } catch (Exception e) {
+ return false;
+ }
+ }
+
+ public void startService() {
+ metaService = new ZookeeperMetadataService(metadataAddress);
+ metaService.startService();
+ }
+
+ public void stopService() {
+ if(metaService != null) {
+ metaService.stopService();
+ metaService = null;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMaster.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMaster.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMaster.java
new file mode 100644
index 0000000..f43bb67
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMaster.java
@@ -0,0 +1,134 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import java.net.InetSocketAddress;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.log4j.Logger;
+
+public class YarnMaster extends Configured implements Tool {
+
+ static final Logger log = Logger.getLogger(YarnMaster.class);
+
+ AMRMProtocol resourceManager;
+ ApplicationAttemptId appAttemptId;
+
+ YarnMasterService service;
+
+ @Override
+ public int run(String[] args) throws Exception {
+ log.trace("BEGIN YarnMaster.run()");
+
+ Configuration conf = getConf();
+
+ this.appAttemptId = getApplicationAttemptId();
+ log.info(String.format("Got application attempt id '%s'", appAttemptId.toString()));
+
+ log.debug("Getting resource manager");
+ this.resourceManager = getResourceManager(conf);
+
+ // register the AM with the RM
+ log.debug("Registering application master");
+ RegisterApplicationMasterRequest appMasterRequest =
+ Records.newRecord(RegisterApplicationMasterRequest.class);
+ appMasterRequest.setApplicationAttemptId(appAttemptId);
+ appMasterRequest.setHost("");
+ appMasterRequest.setRpcPort(0);
+ appMasterRequest.setTrackingUrl("");
+
+ resourceManager.registerApplicationMaster(appMasterRequest);
+
+ String clusterAddress = getEnv(YarnApplication.ENV_CLUSTER_ADDRESS);
+ String clusterName = getEnv(YarnApplication.ENV_CLUSTER_NAME);
+ String metadataAddress = getEnv(YarnApplication.ENV_METADATA_ADDRESS);
+ String providerName = getEnv(YarnApplication.ENV_PROVIDER_NAME);
+ ApplicationConfig appConfig = new ApplicationConfig(clusterAddress, clusterName, metadataAddress, providerName);
+
+ service = new YarnMasterService(resourceManager, conf, appAttemptId, appConfig);
+ service.startService();
+
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+
+ service.stopService();
+
+ // finish application
+ log.debug("Sending finish request");
+ FinishApplicationMasterRequest finishReq =
+ Records.newRecord(FinishApplicationMasterRequest.class);
+
+ finishReq.setAppAttemptId(getApplicationAttemptId());
+ finishReq.setFinishApplicationStatus(FinalApplicationStatus.SUCCEEDED);
+
+ try { resourceManager.finishApplicationMaster(finishReq); } catch(Exception ignore) {}
+ }
+ }));
+
+ try { Thread.currentThread().join(); } catch(Exception ignore) {}
+
+ log.trace("END YarnMaster.run()");
+
+ return 0;
+ }
+
+ private AMRMProtocol getResourceManager(Configuration conf) {
+ // Connect to the Scheduler of the ResourceManager.
+ YarnConfiguration yarnConf = new YarnConfiguration(conf);
+ YarnRPC rpc = YarnRPC.create(yarnConf);
+ InetSocketAddress rmAddress =
+ NetUtils.createSocketAddr(yarnConf.get(
+ YarnConfiguration.RM_SCHEDULER_ADDRESS,
+ YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS));
+ log.info("Connecting to ResourceManager at " + rmAddress);
+ AMRMProtocol resourceManager =
+ (AMRMProtocol) rpc.getProxy(AMRMProtocol.class, rmAddress, conf);
+ return resourceManager;
+ }
+
+ private ApplicationAttemptId getApplicationAttemptId() {
+ ContainerId containerId = ConverterUtils.toContainerId(getEnv(ApplicationConstants.AM_CONTAINER_ID_ENV));
+ ApplicationAttemptId appAttemptID = containerId.getApplicationAttemptId();
+ return appAttemptID;
+ }
+
+ private String getEnv(String key) {
+ Map<String, String> envs = System.getenv();
+ String clusterName = envs.get(key);
+ if (clusterName == null) {
+ // container id should always be set in the env by the framework
+ throw new IllegalArgumentException(
+ String.format("%s not set in the environment", key));
+ }
+ return clusterName;
+ }
+
+ public static void main(String[] args) throws Exception {
+ log.trace("BEGIN YarnMaster.main()");
+
+ try {
+ int rc = ToolRunner.run(new Configuration(), new YarnMaster(), args);
+ System.exit(rc);
+ } catch (Exception e) {
+ System.err.println(e);
+ System.exit(1);
+ }
+
+ log.trace("END YarnMaster.main()");
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMasterProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMasterProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMasterProcess.java
new file mode 100644
index 0000000..bd4fb3d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMasterProcess.java
@@ -0,0 +1,119 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import java.net.InetSocketAddress;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.log4j.Logger;
+
+public class YarnMasterProcess {
+
+ static final Logger log = Logger.getLogger(YarnMasterProcess.class);
+
+ public static void main(String[] args) throws Exception {
+ log.trace("BEGIN YarnMaster.main()");
+
+ final ApplicationAttemptId appAttemptId = getApplicationAttemptId();
+ log.info(String.format("Got application attempt id '%s'", appAttemptId.toString()));
+
+ log.debug("Connecting to resource manager");
+ Configuration conf = new YarnConfiguration();
+
+ final AMRMProtocol resourceManager = getResourceManager(conf);
+
+ // register the AM with the RM
+ log.debug("Registering application master");
+ RegisterApplicationMasterRequest appMasterRequest =
+ Records.newRecord(RegisterApplicationMasterRequest.class);
+ appMasterRequest.setApplicationAttemptId(appAttemptId);
+ appMasterRequest.setHost("");
+ appMasterRequest.setRpcPort(0);
+ appMasterRequest.setTrackingUrl("");
+
+ resourceManager.registerApplicationMaster(appMasterRequest);
+
+ String clusterAddress = getEnv(YarnApplication.ENV_CLUSTER_ADDRESS);
+ String clusterName = getEnv(YarnApplication.ENV_CLUSTER_NAME);
+ String metadataAddress = getEnv(YarnApplication.ENV_METADATA_ADDRESS);
+ String providerName = getEnv(YarnApplication.ENV_PROVIDER_NAME);
+ ApplicationConfig appConfig = new ApplicationConfig(clusterAddress, clusterName, metadataAddress, providerName);
+
+ log.debug("Launching metadata service");
+ final ZookeeperMetadataService metaService = new ZookeeperMetadataService(metadataAddress);
+ metaService.startService();
+
+ log.debug("Launching yarn master service");
+ final YarnMasterService service = new YarnMasterService(resourceManager, conf, appAttemptId, appConfig, metaService);
+ service.startService();
+
+ log.debug("Installing shutdown hooks");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+
+ service.stopService();
+
+ metaService.stopService();
+
+ // finish application
+ log.debug("Sending finish request");
+ FinishApplicationMasterRequest finishReq =
+ Records.newRecord(FinishApplicationMasterRequest.class);
+
+ finishReq.setAppAttemptId(getApplicationAttemptId());
+ finishReq.setFinishApplicationStatus(FinalApplicationStatus.SUCCEEDED);
+
+ try { resourceManager.finishApplicationMaster(finishReq); } catch(Exception ignore) {}
+ }
+ }));
+
+ System.out.println("Press ENTER to stop master service");
+ System.in.read();
+
+ log.trace("END YarnMaster.main()");
+ }
+
+ static AMRMProtocol getResourceManager(Configuration conf) {
+ // Connect to the Scheduler of the ResourceManager.
+ YarnConfiguration yarnConf = new YarnConfiguration(conf);
+ YarnRPC rpc = YarnRPC.create(yarnConf);
+ InetSocketAddress rmAddress =
+ NetUtils.createSocketAddr(yarnConf.get(
+ YarnConfiguration.RM_SCHEDULER_ADDRESS,
+ YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS));
+ log.info("Connecting to ResourceManager at " + rmAddress);
+ AMRMProtocol resourceManager =
+ (AMRMProtocol) rpc.getProxy(AMRMProtocol.class, rmAddress, conf);
+ return resourceManager;
+ }
+
+ static ApplicationAttemptId getApplicationAttemptId() {
+ ContainerId containerId = ConverterUtils.toContainerId(getEnv(ApplicationConstants.AM_CONTAINER_ID_ENV));
+ ApplicationAttemptId appAttemptID = containerId.getApplicationAttemptId();
+ return appAttemptID;
+ }
+
+ static String getEnv(String key) {
+ Map<String, String> envs = System.getenv();
+ String clusterName = envs.get(key);
+ if (clusterName == null) {
+ // container id should always be set in the env by the framework
+ throw new IllegalArgumentException(
+ String.format("%s not set in the environment", key));
+ }
+ return clusterName;
+ }
+
+}
[05/15] Adding Helix-task-framework and Yarn integration modules
Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerStatusProvider.java
new file mode 100644
index 0000000..f7e3076
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerStatusProvider.java
@@ -0,0 +1,52 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import org.apache.helix.metamanager.ContainerStatusProvider;
+import org.apache.helix.metamanager.impl.yarn.ContainerMetadata.ContainerState;
+import org.apache.helix.metamanager.impl.yarn.MetadataService.MetadataServiceException;
+
+public class YarnContainerStatusProvider implements ContainerStatusProvider {
+
+ final String metadataAddress;
+
+ ZookeeperMetadataService metaService;
+
+ public YarnContainerStatusProvider(String metadataAddress) {
+ this.metadataAddress = metadataAddress;
+ this.metaService = new ZookeeperMetadataService(metadataAddress);
+ }
+
+ @Override
+ public boolean exists(String id) {
+ return metaService.exists(id);
+ }
+
+ @Override
+ public boolean isActive(String id) {
+ try {
+ return metaService.read(id).state == ContainerState.ACTIVE;
+ } catch (MetadataServiceException e) {
+ return false;
+ }
+ }
+
+ @Override
+ public boolean isFailed(String id) {
+ try {
+ return metaService.read(id).state == ContainerState.FAILED;
+ } catch (Exception e) {
+ return false;
+ }
+ }
+
+ public void startService() {
+ metaService = new ZookeeperMetadataService(metadataAddress);
+ metaService.startService();
+ }
+
+ public void stopService() {
+ if(metaService != null) {
+ metaService.stopService();
+ metaService = null;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnDataProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnDataProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnDataProvider.java
new file mode 100644
index 0000000..8bd80b5
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnDataProvider.java
@@ -0,0 +1,73 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.Collection;
+
+/**
+ * Abstraction for a (remote) repository of yarn container meta data. Meta data
+ * is read and updated by {@link YarnContainerProvider}
+ * {@link YarnMasterProcess}, {@link YarnContainerProcess}.<br/>
+ * <b>NOTE:</b> Each operation is assumed to be atomic.
+ *
+ */
+interface YarnDataProvider {
+
+ /**
+ * Checks for existence of meta data about container insatnce
+ *
+ * @param id
+ * unique container id
+ * @return true, if meta data exists
+ */
+ public boolean exists(String id);
+
+ /**
+ * Create meta data entry. Check for non-existence of meta data for given
+ * container id and create node.
+ *
+ * @param data
+ * container meta data with unique id
+ * @throws Exception
+ * if meta data entry already exist
+ */
+ public void create(YarnContainerData data) throws Exception;
+
+ /**
+ * Read meta data for given container id.
+ *
+ * @param id
+ * unique container id
+ * @return yarn container data
+ * @throws Exception
+ * if meta data entry for given id does not exist
+ */
+ public YarnContainerData read(String id) throws Exception;
+
+ /**
+ * Read all meta data stored for this domain space of yarn providers and
+ * containers.
+ *
+ * @return collection of meta data entries, empty if none
+ * @throws Exception
+ */
+ public Collection<YarnContainerData> readAll() throws Exception;
+
+ /**
+ * Write meta data entry.
+ *
+ * @param data
+ * yarn container meta data
+ * @throws Exception
+ * if meta data entry for given id does not exist
+ */
+ public void update(YarnContainerData data) throws Exception;
+
+ /**
+ * Delete meta data entry. Frees up unique id to be reused. May throw an
+ * exception on non-existence or be idempotent.
+ *
+ * @param id
+ * unique container id
+ * @throws Exception
+ */
+ public void delete(String id) throws Exception;
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterProcess.java
new file mode 100644
index 0000000..d4447ee
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterProcess.java
@@ -0,0 +1,144 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.net.InetSocketAddress;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.provider.ProviderProcess;
+import org.apache.log4j.Logger;
+
+/**
+ * Host process for {@link YarnContainerProviderProcess}. Hasts application
+ * master in YARN and provider participant to Helix meta cluster. (Program entry
+ * point)
+ *
+ */
+class YarnMasterProcess {
+
+ static final Logger log = Logger.getLogger(YarnMasterProcess.class);
+
+ public static void main(String[] args) throws Exception {
+ log.trace("BEGIN YarnMaster.main()");
+
+ final ApplicationAttemptId appAttemptId = getApplicationAttemptId();
+ log.info(String.format("Got application attempt id '%s'", appAttemptId.toString()));
+
+ log.debug("Reading master properties");
+ YarnMasterProperties properties = YarnUtils.createMasterProperties(YarnUtils.getPropertiesFromPath(YarnUtils.YARN_MASTER_PROPERTIES));
+
+ if (!properties.isValid())
+ throw new IllegalArgumentException(String.format("master properties not valid: %s", properties.toString()));
+
+ log.debug("Connecting to resource manager");
+ Configuration conf = new YarnConfiguration();
+ conf.set(YarnConfiguration.RM_ADDRESS, properties.getResourceManager());
+ conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, properties.getScheduler());
+ conf.set(FileSystem.FS_DEFAULT_NAME_KEY, properties.getHdfs());
+
+ final AMRMProtocol resourceManager = getResourceManager(conf);
+
+ // register the AM with the RM
+ log.debug("Registering application master");
+ RegisterApplicationMasterRequest appMasterRequest = Records.newRecord(RegisterApplicationMasterRequest.class);
+ appMasterRequest.setApplicationAttemptId(appAttemptId);
+ appMasterRequest.setHost("");
+ appMasterRequest.setRpcPort(0);
+ appMasterRequest.setTrackingUrl("");
+
+ resourceManager.registerApplicationMaster(appMasterRequest);
+
+ log.debug("Starting yarndata service");
+ final ZookeeperYarnDataProvider yarnDataService = new ZookeeperYarnDataProvider(properties.getYarnData());
+ yarnDataService.start();
+
+ log.debug("Starting yarn master service");
+ final YarnMasterService service = new YarnMasterService();
+ service.configure(properties);
+ service.setAttemptId(appAttemptId);
+ service.setYarnDataProvider(yarnDataService);
+ service.setProtocol(resourceManager);
+ service.setYarnConfiguration(conf);
+ service.start();
+
+ log.debug("Starting provider");
+ final YarnContainerProvider provider = new YarnContainerProvider();
+ provider.configure(properties);
+ provider.start();
+
+ log.debug("Starting provider process");
+ final ProviderProcess process = new ProviderProcess();
+ process.configure(properties);
+ process.setConteinerProvider(provider);
+ process.start();
+
+ log.debug("Installing shutdown hooks");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ log.debug("Stopping provider process");
+ process.stop();
+
+ log.debug("Stopping provider");
+ try { provider.stop(); } catch (Exception ignore) {}
+
+ log.debug("Stopping yarn master service");
+ service.stop();
+
+ log.debug("Stopping yarndata service");
+ yarnDataService.stop();
+
+ // finish application
+ log.debug("Sending finish request");
+ FinishApplicationMasterRequest finishReq = Records.newRecord(FinishApplicationMasterRequest.class);
+
+ finishReq.setAppAttemptId(getApplicationAttemptId());
+ finishReq.setFinishApplicationStatus(FinalApplicationStatus.SUCCEEDED);
+
+ try { resourceManager.finishApplicationMaster(finishReq); } catch(Exception ignore) {}
+ }
+ }));
+
+ log.trace("END YarnMaster.main()");
+ }
+
+ static AMRMProtocol getResourceManager(Configuration conf) {
+ // Connect to the Scheduler of the ResourceManager.
+ YarnConfiguration yarnConf = new YarnConfiguration(conf);
+ YarnRPC rpc = YarnRPC.create(yarnConf);
+ InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(YarnConfiguration.RM_SCHEDULER_ADDRESS,
+ YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS));
+ log.info("Connecting to ResourceManager at " + rmAddress);
+ AMRMProtocol resourceManager = (AMRMProtocol) rpc.getProxy(AMRMProtocol.class, rmAddress, conf);
+ return resourceManager;
+ }
+
+ static ApplicationAttemptId getApplicationAttemptId() {
+ ContainerId containerId = ConverterUtils.toContainerId(getEnv(ApplicationConstants.AM_CONTAINER_ID_ENV));
+ ApplicationAttemptId appAttemptID = containerId.getApplicationAttemptId();
+ return appAttemptID;
+ }
+
+ static String getEnv(String key) {
+ Map<String, String> envs = System.getenv();
+ String clusterName = envs.get(key);
+ if (clusterName == null) {
+ // container id should always be set in the env by the framework
+ throw new IllegalArgumentException(String.format("%s not set in the environment", key));
+ }
+ return clusterName;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterProperties.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterProperties.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterProperties.java
new file mode 100644
index 0000000..abeb461
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterProperties.java
@@ -0,0 +1,13 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+/**
+ * Base configuration for {@link YarnMasterProcess}.
+ *
+ */
+public class YarnMasterProperties extends YarnContainerProviderProperties {
+ /**
+ *
+ */
+ private static final long serialVersionUID = -2209509980239674160L;
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterService.java
new file mode 100644
index 0000000..1e7aec3
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnMasterService.java
@@ -0,0 +1,414 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.io.File;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ContainerManager;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.records.AMResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+/**
+ * Implements YARN application master. Continuously monitors container health in
+ * YARN and yarn meta data updates. Spawns and destroys containers.
+ *
+ */
+class YarnMasterService implements Service {
+
+ static final Logger log = Logger.getLogger(YarnMasterService.class);
+
+ static final String REQUIRED_TYPE = "container";
+
+ static final long ZOOKEEPER_TIMEOUT = 5000;
+ static final long MASTERSERVICE_INTERVAL = 1000;
+
+ static final String CONTAINERS = "CONTAINERS";
+
+ static final String YARN_CONTAINER_COMMAND = "/bin/sh %s 1>%s/stdout 2>%s/stderr";
+
+ YarnMasterProperties properties;
+ AMRMProtocol protocol;
+ ApplicationAttemptId attemptId;
+ Configuration yarnConfig;
+ YarnDataProvider yarnDataService;
+
+ final Map<ContainerId, Container> unassignedContainers = new HashMap<ContainerId, Container>();
+ final Map<ContainerId, Container> activeContainers = new HashMap<ContainerId, Container>();
+ final Map<ContainerId, ContainerStatus> completedContainers = new HashMap<ContainerId, ContainerStatus>();
+ final Map<ContainerId, String> yarn2meta = new HashMap<ContainerId, String>();
+
+ ScheduledExecutorService executor;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ YarnMasterProperties yarnProperties = YarnUtils.createMasterProperties(properties);
+ Preconditions.checkArgument(yarnProperties.isValid());
+ this.properties = yarnProperties;
+ }
+
+ public void setProtocol(AMRMProtocol protocol) {
+ this.protocol = protocol;
+ }
+
+ public void setAttemptId(ApplicationAttemptId attemptId) {
+ this.attemptId = attemptId;
+ }
+
+ public void setYarnConfiguration(Configuration yarnConfig) {
+ this.yarnConfig = yarnConfig;
+ }
+
+ public void setYarnDataProvider(YarnDataProvider yarnDataService) {
+ this.yarnDataService = yarnDataService;
+ }
+
+ @Override
+ public void start() {
+ Preconditions.checkNotNull(properties);
+ Preconditions.checkNotNull(protocol);
+ Preconditions.checkNotNull(attemptId);
+ Preconditions.checkNotNull(yarnConfig);
+ Preconditions.checkNotNull(yarnDataService);
+
+ log.debug("starting yarn master service");
+
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new YarnService(), 0, MASTERSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+ }
+
+ @Override
+ public void stop() {
+ log.debug("stopping yarn master service");
+
+ if (executor != null) {
+ executor.shutdown();
+ while (!executor.isTerminated()) {
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ }
+ executor = null;
+ }
+
+ destroyLocalMasterNamespace();
+ }
+
+ Collection<YarnContainerData> readOwnedYarnData() throws Exception {
+ log.debug("reading container data");
+
+ Collection<YarnContainerData> containers = new ArrayList<YarnContainerData>();
+ for (YarnContainerData meta : yarnDataService.readAll()) {
+ if (meta.owner.equals(properties.getName())) {
+ containers.add(meta);
+ log.debug(String.format("found container node '%s' (state=%s, yarnId=%s, owner=%s)", meta.id, meta.state, meta.yarnId, meta.owner));
+ }
+ }
+ return containers;
+ }
+
+ class YarnService implements Runnable {
+ int responseId = 0;
+
+ @Override
+ public void run() {
+ try {
+ log.debug("running yarn service update cycle");
+
+ Collection<YarnContainerData> yarndata = readOwnedYarnData();
+
+ // active meta containers
+ int numMetaActive = countActiveMeta(yarndata);
+
+ // newly acquired meta containers
+ int numMetaAcquire = countAcquireMeta(yarndata);
+
+ // destroyed meta containers
+ List<ContainerId> destroyedReleasedIds = createDestroyedReleaseList(yarndata);
+ int numMetaCompleted = destroyedReleasedIds.size();
+
+ int numMeta = numMetaAcquire + numMetaActive + numMetaCompleted;
+
+ // yarn containers
+ int numYarnUnassigned = unassignedContainers.size();
+ int numYarnActive = activeContainers.size();
+ int numYarnCompleted = completedContainers.size();
+ int numYarn = numYarnUnassigned + numYarnActive + numYarnCompleted;
+
+ int numYarnRequired = numMetaAcquire - numYarnUnassigned;
+
+ // additionally required containers
+ int numRequestAdditional = Math.max(0, numYarnRequired);
+
+ // overstock containers
+ List<ContainerId> unneededReleasedIds = createOverstockReleaseList(numYarnRequired);
+
+ int numReleased = destroyedReleasedIds.size() + unneededReleasedIds.size();
+
+ log.debug(String.format("meta containers (total=%d, acquire=%d, active=%d, completed=%d)", numMeta, numMetaAcquire, numMetaActive, numMetaCompleted));
+ log.debug(String.format("yarn containers (total=%d, unassigned=%d, active=%d, completed=%d)", numYarn, numYarnUnassigned, numYarnActive, numYarnCompleted));
+ log.debug(String.format("requesting %d new containers, releasing %d", numRequestAdditional, numReleased));
+
+ Priority priority = Records.newRecord(Priority.class);
+ priority.setPriority(0);
+
+ Resource resource = Records.newRecord(Resource.class);
+ resource.setMemory(256); // TODO make dynamic
+
+ ResourceRequest resourceRequest = Records.newRecord(ResourceRequest.class);
+ resourceRequest.setHostName("*");
+ resourceRequest.setNumContainers(numRequestAdditional);
+ resourceRequest.setPriority(priority);
+ resourceRequest.setCapability(resource);
+
+ AllocateRequest request = Records.newRecord(AllocateRequest.class);
+ request.setResponseId(responseId);
+ request.setApplicationAttemptId(attemptId);
+ request.addAsk(resourceRequest);
+ request.addAllReleases(destroyedReleasedIds);
+ request.addAllReleases(unneededReleasedIds);
+
+ responseId++;
+
+ AllocateResponse allocateResponse = null;
+ try {
+ allocateResponse = protocol.allocate(request);
+ } catch (YarnRemoteException e) {
+ // ignore
+ log.error("Error allocating containers", e);
+ return;
+ }
+
+ AMResponse response = allocateResponse.getAMResponse();
+
+ // remove unassigned container about to be freed
+ for (ContainerId id : unneededReleasedIds) {
+ log.info(String.format("Unassigned container '%s' about to be freed, removing", id));
+ unassignedContainers.remove(id);
+ }
+
+ // newly added containers
+ for (Container container : response.getAllocatedContainers()) {
+ unassignedContainers.put(container.getId(), container);
+ }
+
+ log.info(String.format("%d new containers available, %d required", unassignedContainers.size(), numMetaAcquire));
+
+ Iterator<Container> itYarn = unassignedContainers.values().iterator();
+ Iterator<YarnContainerData> itMeta = yarndata.iterator();
+ while (itYarn.hasNext() && itMeta.hasNext()) {
+ YarnContainerData meta = itMeta.next();
+
+ if (meta.yarnId >= 0)
+ continue;
+
+ Container containerYarn = itYarn.next();
+
+ log.debug(String.format("assigning yarn container '%s' to container node '%s'", containerYarn.getId(), meta.id));
+
+ String command = String.format(YARN_CONTAINER_COMMAND, YarnUtils.YARN_CONTAINER_PATH, ApplicationConstants.LOG_DIR_EXPANSION_VAR,
+ ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+
+ log.debug(String.format("Running container command \"%s\"", command));
+
+ // configuration
+ YarnContainerProcessProperties containerProp = meta.getProperties();
+ containerProp.setProperty(YarnContainerProcessProperties.ADDRESS, properties.getAddress());
+ containerProp.setProperty(YarnContainerProcessProperties.CLUSTER, properties.getCluster());
+ containerProp.setProperty(YarnContainerProcessProperties.YARNDATA, properties.getYarnData());
+ containerProp.setProperty(YarnContainerProcessProperties.NAME, meta.id);
+
+ File propertiesFile = YarnUtils.writePropertiesToTemp(containerProp);
+
+ // HDFS
+ final String namespace = attemptId.getApplicationId().toString() + "/" + meta.id;
+ final Path containerArchive = YarnUtils.copyToHdfs(YarnUtils.YARN_CONTAINER_STAGING, YarnUtils.YARN_CONTAINER_STAGING, namespace, yarnConfig);
+ final Path containerProperties = YarnUtils.copyToHdfs(propertiesFile.getCanonicalPath(), YarnUtils.YARN_CONTAINER_PROPERTIES, namespace, yarnConfig);
+
+ // local resources
+ Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
+ localResources.put(YarnUtils.YARN_CONTAINER_DESTINATION,
+ YarnUtils.createHdfsResource(containerArchive, LocalResourceType.ARCHIVE, yarnConfig));
+ localResources.put(YarnUtils.YARN_CONTAINER_PROPERTIES,
+ YarnUtils.createHdfsResource(containerProperties, LocalResourceType.FILE, yarnConfig));
+
+ ContainerLaunchContext context = Records.newRecord(ContainerLaunchContext.class);
+ context.setContainerId(containerYarn.getId());
+ context.setResource(containerYarn.getResource());
+ context.setEnvironment(Maps.<String, String> newHashMap());
+ context.setCommands(Collections.singletonList(command));
+ context.setLocalResources(localResources);
+ context.setUser(properties.getUser());
+
+ log.debug(String.format("container '%s' executing command '%s'", meta.id, command));
+
+ StartContainerRequest startReq = Records.newRecord(StartContainerRequest.class);
+ startReq.setContainerLaunchContext(context);
+
+ try {
+ getContainerManager(containerYarn).startContainer(startReq);
+
+ } catch (YarnRemoteException e) {
+ log.error(String.format("Error starting container '%s'", meta.id), e);
+ return;
+ }
+
+ log.debug(String.format("container '%s' started, updating container node", meta.id));
+
+ meta.setProperties(containerProp);
+ meta.setState(ContainerState.CONNECTING);
+ meta.setYarnId(containerYarn.getId().getId());
+ yarnDataService.update(meta);
+
+ yarn2meta.put(containerYarn.getId(), meta.id);
+
+ log.debug(String.format("removing '%s' from unassigned yarn containers and adding to active list", containerYarn.getId()));
+
+ itYarn.remove();
+ activeContainers.put(containerYarn.getId(), containerYarn);
+
+ // cleanup
+ propertiesFile.deleteOnExit();
+
+ }
+
+ for (ContainerStatus status : response.getCompletedContainersStatuses()) {
+ ContainerId id = status.getContainerId();
+
+ log.info(String.format("Container '%s' completed", id));
+
+ if (unassignedContainers.containsKey(id)) {
+ log.info(String.format("Unassigned container '%s' terminated, removing", id));
+ unassignedContainers.remove(id);
+ }
+
+ if (activeContainers.containsKey(id)) {
+ log.info(String.format("Active container '%s' terminated, removing", id));
+ activeContainers.remove(id);
+
+ String metaId = yarn2meta.get(id);
+ YarnContainerData meta = yarnDataService.read(metaId);
+
+ log.debug(String.format("container '%s' finalized, updating container node", meta.id));
+
+ yarnDataService.update(meta.setState(ContainerState.FINALIZE));
+ }
+
+ completedContainers.put(id, status);
+ }
+
+ log.debug("yarn service update cycle complete");
+
+ } catch (Exception e) {
+ log.error("Error while executing yarn update cycle", e);
+ }
+ }
+
+ private List<ContainerId> createOverstockReleaseList(int numYarnRequired) {
+ List<ContainerId> unneededReleasedIds = new ArrayList<ContainerId>();
+ Iterator<Container> itUnassigned = unassignedContainers.values().iterator();
+ if (numYarnRequired < 0) {
+ for (int i = 0; i < -numYarnRequired && itUnassigned.hasNext(); i++) {
+ Container container = itUnassigned.next();
+ unneededReleasedIds.add(container.getId());
+ log.debug(String.format("Container '%s' no longer required, removing", container.getId()));
+ itUnassigned.remove();
+ }
+ }
+ return unneededReleasedIds;
+ }
+
+ private List<ContainerId> createDestroyedReleaseList(Collection<YarnContainerData> yarndata) {
+ List<ContainerId> releasedIds = new ArrayList<ContainerId>();
+ for (YarnContainerData meta : yarndata) {
+ if (meta.state == ContainerState.HALTED) {
+ ContainerId containerId = Records.newRecord(ContainerId.class);
+ containerId.setApplicationAttemptId(attemptId);
+ containerId.setId(meta.yarnId);
+ releasedIds.add(containerId);
+ log.debug(String.format("releasing container '%s'", containerId));
+ }
+ }
+ return releasedIds;
+ }
+
+ private int countAcquireMeta(Collection<YarnContainerData> yarndata) {
+ int numMetaAcquire = 0;
+ for (YarnContainerData meta : yarndata) {
+ if (meta.state == ContainerState.ACQUIRE) {
+ numMetaAcquire++;
+ }
+ }
+ return numMetaAcquire;
+ }
+
+ private int countActiveMeta(Collection<YarnContainerData> yarndata) {
+ int numMetaActive = 0;
+ for (YarnContainerData meta : yarndata) {
+ if (meta.state != ContainerState.ACQUIRE && meta.state != ContainerState.HALTED && meta.state != ContainerState.FINALIZE) {
+ numMetaActive++;
+ }
+ }
+ return numMetaActive;
+ }
+ }
+
+ private ContainerManager getContainerManager(Container container) {
+ YarnConfiguration yarnConf = new YarnConfiguration(yarnConfig);
+ YarnRPC rpc = YarnRPC.create(yarnConf);
+ NodeId nodeId = container.getNodeId();
+ String containerIpPort = String.format("%s:%d", nodeId.getHost(), nodeId.getPort());
+ log.info("Connecting to ContainerManager at: " + containerIpPort);
+ InetSocketAddress addr = NetUtils.createSocketAddr(containerIpPort);
+ ContainerManager cm = (ContainerManager) rpc.getProxy(ContainerManager.class, addr, yarnConfig);
+ return cm;
+ }
+
+ public static void destroyLocalMasterNamespace() {
+ log.info("cleaning up master directory");
+ FileUtils.deleteQuietly(new File(YarnUtils.YARN_MASTER_DESTINATION));
+ FileUtils.deleteQuietly(new File(YarnUtils.YARN_MASTER_PROPERTIES));
+ FileUtils.deleteQuietly(new File(YarnUtils.YARN_CONTAINER_STAGING));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnStatusProvider.java
new file mode 100644
index 0000000..b4a13b9
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnStatusProvider.java
@@ -0,0 +1,67 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.Properties;
+
+import org.apache.helix.metamanager.StatusProviderService;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+/**
+ * StatusProvider for YARN-based containers spawned via
+ * {@link YarnContainerProvider}. Reads {@link YarnDataProvider} meta data.
+ * Runnable and configurable service.
+ *
+ */
+public class YarnStatusProvider implements StatusProviderService {
+
+ static final Logger log = Logger.getLogger(YarnStatusProvider.class);
+
+ String yarndata;
+
+ ZookeeperYarnDataProvider yarnDataService;
+
+ public YarnStatusProvider() {
+ // left blank
+ }
+
+ public YarnStatusProvider(String yarndata) {
+ this.yarndata = yarndata;
+ this.yarnDataService = new ZookeeperYarnDataProvider(yarndata);
+ }
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ this.yarndata = properties.getProperty("yarndata");
+ this.yarnDataService = new ZookeeperYarnDataProvider(yarndata);
+ }
+
+ @Override
+ public void start() throws Exception {
+ yarnDataService = new ZookeeperYarnDataProvider(yarndata);
+ yarnDataService.start();
+ }
+
+ @Override
+ public void stop() throws Exception {
+ if (yarnDataService != null) {
+ yarnDataService.stop();
+ yarnDataService = null;
+ }
+ }
+
+ @Override
+ public boolean exists(String id) {
+ return yarnDataService.exists(id);
+ }
+
+ @Override
+ public boolean isHealthy(String id) {
+ try {
+ return yarnDataService.read(id).state == ContainerState.ACTIVE;
+ } catch (Exception e) {
+ log.warn(String.format("Could not get activity data of %s", id));
+ return false;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnUtils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnUtils.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnUtils.java
new file mode 100644
index 0000000..fe093c8
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnUtils.java
@@ -0,0 +1,174 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.UnsupportedFileSystemException;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.URL;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.io.Files;
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.TypeAdapter;
+import com.google.gson.stream.JsonReader;
+import com.google.gson.stream.JsonToken;
+import com.google.gson.stream.JsonWriter;
+
+/**
+ * Utility for writing property files, transferring data via HDFS and
+ * serializing {@link YarnContainerData} for zookeeper.
+ *
+ */
+class YarnUtils {
+
+ static final Logger log = Logger.getLogger(YarnUtils.class);
+
+ static final String YARN_MASTER_ARCHIVE_PATH = "target/metamanager-assembly.tar.gz";
+ static final String YARN_MASTER_PATH = "master/metamanager/bin/yarn-master-process.sh";
+ static final String YARN_MASTER_STAGING = "master.tar.gz";
+ static final String YARN_MASTER_DESTINATION = "master";
+ static final String YARN_MASTER_PROPERTIES = "master.properties";
+ static final String YARN_CONTAINER_ARCHIVE_PATH = "target/metamanager-assembly.tar.gz";
+ static final String YARN_CONTAINER_STAGING = "container.tar.gz";
+ static final String YARN_CONTAINER_PATH = "container/metamanager/bin/yarn-container-process.sh";
+ static final String YARN_CONTAINER_DESTINATION = "container";
+ static final String YARN_CONTAINER_PROPERTIES = "container.properties";
+
+ static Gson gson;
+ static {
+ GsonBuilder builder = new GsonBuilder();
+ builder.registerTypeAdapter(ContainerState.class, new ContainerStateAdapter());
+ builder.setPrettyPrinting();
+ gson = builder.create();
+ }
+
+ public static String toJson(YarnContainerData meta) {
+ return gson.toJson(meta);
+ }
+
+ public static YarnContainerData fromJson(String json) {
+ return gson.fromJson(json, YarnContainerData.class);
+ }
+
+ public static Properties getPropertiesFromPath(String path) throws IOException {
+ Properties properties = new Properties();
+ properties.load(new InputStreamReader(new FileInputStream(path)));
+ return properties;
+ }
+
+ public static File writePropertiesToTemp(Properties properties) throws IOException {
+ File tmpFile = File.createTempFile("provider", ".properties");
+ Writer writer = Files.newWriter(tmpFile, Charset.defaultCharset());
+ properties.store(writer, null);
+ writer.flush();
+ writer.close();
+ return tmpFile;
+ }
+
+ public static Path copyToHdfs(String source, String dest, String namespace, Configuration conf) throws IOException {
+ Path sourcePath = makeQualified(source);
+ Path destPath = makeQualified(conf.get(FileSystem.FS_DEFAULT_NAME_KEY) + "/" + namespace + "/" + dest);
+ log.debug(String.format("Copying '%s' to '%s'", sourcePath, destPath));
+
+ FileSystem fs = FileSystem.get(conf);
+ fs.copyFromLocalFile(false, true, sourcePath, destPath);
+ fs.close();
+ return destPath;
+ }
+
+ public static void destroyHdfsNamespace(String namespace, Configuration conf) throws IOException {
+ Path path = makeQualified(conf.get(FileSystem.FS_DEFAULT_NAME_KEY) + "/" + namespace);
+ log.debug(String.format("Deleting '%s'", path));
+
+ FileSystem fs = FileSystem.get(conf);
+ fs.delete(path, true);
+ fs.close();
+ }
+
+ public static LocalResource createHdfsResource(Path path, LocalResourceType type, Configuration conf) throws IOException {
+ FileSystem fs = FileSystem.get(conf);
+
+ URL url = ConverterUtils.getYarnUrlFromPath(path);
+
+ FileStatus status = fs.getFileStatus(path);
+
+ LocalResource resource = Records.newRecord(LocalResource.class);
+ resource.setResource(url);
+ resource.setSize(status.getLen());
+ resource.setTimestamp(status.getModificationTime());
+ resource.setType(type);
+ resource.setVisibility(LocalResourceVisibility.APPLICATION);
+
+ fs.close();
+
+ return resource;
+ }
+
+ static Path makeQualified(String path) throws UnsupportedFileSystemException {
+ return FileContext.getFileContext().makeQualified(new Path(path));
+ }
+
+ static class ContainerStateAdapter extends TypeAdapter<ContainerState> {
+ @Override
+ public ContainerState read(JsonReader reader) throws IOException {
+ if (reader.peek() == JsonToken.NULL) {
+ reader.nextNull();
+ return null;
+ }
+ return ContainerState.valueOf(reader.nextString());
+ }
+
+ @Override
+ public void write(JsonWriter writer, ContainerState value) throws IOException {
+ if (value == null) {
+ writer.nullValue();
+ return;
+ }
+ writer.value(value.name());
+ }
+ }
+
+ static YarnContainerProcessProperties createContainerProcessProperties(Properties properties) {
+ Preconditions.checkNotNull(properties);
+ YarnContainerProcessProperties yarnProp = new YarnContainerProcessProperties();
+ yarnProp.putAll(properties);
+ return yarnProp;
+ }
+
+ static YarnContainerProviderProperties createContainerProviderProperties(Properties properties) {
+ Preconditions.checkNotNull(properties);
+ YarnContainerProviderProperties yarnProp = new YarnContainerProviderProperties();
+ yarnProp.putAll(properties);
+ return yarnProp;
+ }
+
+ static YarnMasterProperties createMasterProperties(Properties properties) {
+ Preconditions.checkNotNull(properties);
+ YarnMasterProperties yarnProp = new YarnMasterProperties();
+ yarnProp.putAll(properties);
+ return yarnProp;
+ }
+
+ private YarnUtils() {
+ // left blank
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperMetadataProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperMetadataProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperMetadataProvider.java
new file mode 100644
index 0000000..79efd8c
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperMetadataProvider.java
@@ -0,0 +1,116 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Properties;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.exception.ZkException;
+import org.apache.helix.metamanager.Service;
+import org.apache.log4j.Logger;
+
+public class ZookeeperMetadataProvider implements MetadataProvider, Service {
+
+ static final Logger log = Logger.getLogger(ZookeeperMetadataProvider.class);
+
+ static final String CONTAINER_NAMESPACE = "containers";
+
+ static final String BASE_PATH = "/" + CONTAINER_NAMESPACE;
+
+ static final int META_TIMEOUT = 5000;
+ static final long POLL_INTERVAL = 100;
+
+ String metadata;
+
+ ZkClient client;
+
+ public ZookeeperMetadataProvider() {
+ // left blank
+ }
+
+ public ZookeeperMetadataProvider(String metadataAddress) {
+ this.metadata = metadataAddress;
+ }
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ this.metadata = properties.getProperty("metadata");
+ }
+
+ @Override
+ public void start() {
+ log.debug(String.format("starting metadata service for '%s'", metadata));
+
+ client = new ZkClient(metadata, META_TIMEOUT, META_TIMEOUT);
+
+ client.createPersistent(BASE_PATH, true);
+ }
+
+ @Override
+ public void stop() {
+ log.debug(String.format("stopping metadata service for '%s'", metadata));
+ if (client != null) {
+ client.close();
+ client = null;
+ }
+ }
+
+ @Override
+ public boolean exists(String id) {
+ return client.exists(makePath(id));
+ }
+
+ @Override
+ public void create(ContainerMetadata meta) throws MetadataException {
+ try {
+ client.createEphemeral(makePath(meta.id), YarnUtils.toJson(meta));
+ } catch (ZkException e) {
+ throw new MetadataException(e);
+ }
+ }
+
+ @Override
+ public ContainerMetadata read(String id) throws MetadataException {
+ try {
+ return YarnUtils.fromJson(client.<String> readData(makePath(id)));
+ } catch (ZkException e) {
+ throw new MetadataException(e);
+ }
+ }
+
+ @Override
+ public Collection<ContainerMetadata> readAll() throws MetadataException {
+ try {
+ Collection<ContainerMetadata> metadata = new ArrayList<ContainerMetadata>();
+ for (String id : client.getChildren(BASE_PATH)) {
+ metadata.add(YarnUtils.fromJson(client.<String> readData(makePath(id))));
+ }
+ return metadata;
+ } catch (ZkException e) {
+ throw new MetadataException(e);
+ }
+ }
+
+ @Override
+ public void update(ContainerMetadata meta) throws MetadataException {
+ try {
+ client.writeData(makePath(meta.id), YarnUtils.toJson(meta));
+ } catch (ZkException e) {
+ throw new MetadataException(e);
+ }
+ }
+
+ @Override
+ public void delete(String id) throws MetadataException {
+ try {
+ client.delete(makePath(id));
+ } catch (ZkException e) {
+ throw new MetadataException(e);
+ }
+ }
+
+ String makePath(String containerId) {
+ return BASE_PATH + "/" + containerId;
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperMetadataService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperMetadataService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperMetadataService.java
new file mode 100644
index 0000000..b0e150a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperMetadataService.java
@@ -0,0 +1,102 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.ArrayList;
+import java.util.Collection;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.exception.ZkException;
+import org.apache.log4j.Logger;
+
+public class ZookeeperMetadataService implements MetadataService {
+
+ static final Logger log = Logger.getLogger(ZookeeperMetadataService.class);
+
+ static final String CONTAINER_NAMESPACE = "containers";
+
+ static final String BASE_PATH = "/" + CONTAINER_NAMESPACE;
+
+ static final long POLL_INTERVAL = 100;
+
+ final String metadataAddress;
+
+ ZkClient client;
+
+ public ZookeeperMetadataService(String metadataAddress) {
+ this.metadataAddress = metadataAddress;
+ }
+
+ public void startService() {
+ log.debug(String.format("starting metadata service for '%s'", metadataAddress));
+
+ client = new ZkClient(metadataAddress);
+
+ client.createPersistent(BASE_PATH, true);
+ }
+
+ public void stopService() {
+ log.debug(String.format("stopping metadata service for '%s'", metadataAddress));
+ if (client != null) {
+ client.close();
+ client = null;
+ }
+ }
+
+ @Override
+ public boolean exists(String id) {
+ return client.exists(makePath(id));
+ }
+
+ @Override
+ public void create(ContainerMetadata meta) throws MetadataServiceException {
+ try {
+ client.createPersistent(makePath(meta.id), YarnUtils.toJson(meta));
+ } catch (ZkException e) {
+ throw new MetadataServiceException(e);
+ }
+ }
+
+ @Override
+ public ContainerMetadata read(String id) throws MetadataServiceException {
+ try {
+ return YarnUtils.fromJson(client.<String> readData(makePath(id)));
+ } catch (ZkException e) {
+ throw new MetadataServiceException(e);
+ }
+ }
+
+ @Override
+ public Collection<ContainerMetadata> readAll() throws MetadataServiceException {
+ try {
+ Collection<ContainerMetadata> metadata = new ArrayList<ContainerMetadata>();
+ for (String id : client.getChildren(BASE_PATH)) {
+ metadata.add(YarnUtils.fromJson(client.<String> readData(makePath(id))));
+ }
+ return metadata;
+ } catch (ZkException e) {
+ throw new MetadataServiceException(e);
+ }
+ }
+
+ @Override
+ public void update(ContainerMetadata meta) throws MetadataServiceException {
+ try {
+ client.writeData(makePath(meta.id), YarnUtils.toJson(meta));
+ } catch (ZkException e) {
+ throw new MetadataServiceException(e);
+ }
+ }
+
+ @Override
+ public void delete(String id) throws MetadataServiceException {
+ try {
+ client.delete(makePath(id));
+ } catch (ZkException e) {
+ throw new MetadataServiceException(e);
+ }
+ }
+
+ String makePath(String containerId) {
+ return BASE_PATH + "/" + containerId;
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperYarnDataProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperYarnDataProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperYarnDataProvider.java
new file mode 100644
index 0000000..32af837
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ZookeeperYarnDataProvider.java
@@ -0,0 +1,100 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Properties;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.metamanager.Service;
+import org.apache.log4j.Logger;
+
+/**
+ * Configurable and runnable service for {@link YarnDataProvider} based on
+ * zookeeper.
+ *
+ */
+public class ZookeeperYarnDataProvider implements YarnDataProvider, Service {
+
+ static final Logger log = Logger.getLogger(ZookeeperYarnDataProvider.class);
+
+ static final String CONTAINER_NAMESPACE = "containers";
+
+ static final String BASE_PATH = "/" + CONTAINER_NAMESPACE;
+
+ static final int META_TIMEOUT = 5000;
+ static final long POLL_INTERVAL = 100;
+
+ String yarndata;
+
+ ZkClient client;
+
+ public ZookeeperYarnDataProvider() {
+ // left blank
+ }
+
+ public ZookeeperYarnDataProvider(String yarndataAddress) {
+ this.yarndata = yarndataAddress;
+ }
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ this.yarndata = properties.getProperty("yarndata");
+ }
+
+ @Override
+ public void start() {
+ log.debug(String.format("starting yarndata service for '%s'", yarndata));
+
+ client = new ZkClient(yarndata, META_TIMEOUT, META_TIMEOUT);
+
+ client.createPersistent(BASE_PATH, true);
+ }
+
+ @Override
+ public void stop() {
+ log.debug(String.format("stopping yarndata service for '%s'", yarndata));
+ if (client != null) {
+ client.close();
+ client = null;
+ }
+ }
+
+ @Override
+ public boolean exists(String id) {
+ return client.exists(makePath(id));
+ }
+
+ @Override
+ public void create(YarnContainerData meta) throws Exception {
+ client.createEphemeral(makePath(meta.id), YarnUtils.toJson(meta));
+ }
+
+ @Override
+ public YarnContainerData read(String id) throws Exception {
+ return YarnUtils.fromJson(client.<String> readData(makePath(id)));
+ }
+
+ @Override
+ public Collection<YarnContainerData> readAll() throws Exception {
+ Collection<YarnContainerData> yarndata = new ArrayList<YarnContainerData>();
+ for (String id : client.getChildren(BASE_PATH)) {
+ yarndata.add(YarnUtils.fromJson(client.<String> readData(makePath(id))));
+ }
+ return yarndata;
+ }
+
+ @Override
+ public void update(YarnContainerData meta) throws Exception {
+ client.writeData(makePath(meta.id), YarnUtils.toJson(meta));
+ }
+
+ @Override
+ public void delete(String id) throws Exception {
+ client.delete(makePath(id));
+ }
+
+ String makePath(String containerId) {
+ return BASE_PATH + "/" + containerId;
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ContainerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ContainerProcess.java
new file mode 100644
index 0000000..11fb75d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ContainerProcess.java
@@ -0,0 +1,85 @@
+package org.apache.helix.metamanager.managed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.log4j.Logger;
+
+public class ContainerProcess
+{
+ static final Logger log = Logger.getLogger(ContainerProcess.class);
+
+ private String clusterName;
+ private String zkAddress;
+ private String instanceName;
+ private HelixManager participantManager;
+
+ public ContainerProcess(String clusterName, String zkAddress, String instanceName)
+ {
+ this.clusterName = clusterName;
+ this.zkAddress = zkAddress;
+ this.instanceName = instanceName;
+
+ }
+
+ public void start() throws Exception
+ {
+ log.info("STARTING "+ instanceName);
+ participantManager = HelixManagerFactory.getZKHelixManager(clusterName,
+ instanceName, InstanceType.PARTICIPANT, zkAddress);
+ participantManager.getStateMachineEngine().registerStateModelFactory(
+ "MasterSlave", new ManagedFactory());
+ participantManager.connect();
+ log.info("STARTED "+ instanceName);
+
+ }
+
+ public void stop()
+ {
+ if (participantManager != null)
+ {
+ participantManager.disconnect();
+ }
+ }
+
+ public static void main(String[] args) throws Exception
+ {
+ final String zkAddress = args[0];
+ final String clusterName = args[1];
+ final String instanceName = args[2];
+
+ // Give a unique id to each process, most commonly used format hostname_port
+ final ContainerProcess managerProcess = new ContainerProcess(clusterName, zkAddress,
+ instanceName);
+ Runtime.getRuntime().addShutdownHook(new Thread()
+ {
+ @Override
+ public void run()
+ {
+ log.info("Shutting down " + instanceName);
+ managerProcess.stop();
+ }
+ });
+ managerProcess.start();
+ Thread.currentThread().join();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/HelixClusterAdmin.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/HelixClusterAdmin.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/HelixClusterAdmin.java
new file mode 100644
index 0000000..f33c09c
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/HelixClusterAdmin.java
@@ -0,0 +1,42 @@
+package org.apache.helix.metamanager.managed;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.metamanager.ClusterAdmin;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.log4j.Logger;
+
+public class HelixClusterAdmin implements ClusterAdmin {
+
+ static final Logger log = Logger.getLogger(HelixClusterAdmin.class);
+
+ final String clusterName;
+ final String resourceName;
+ final int replica;
+ final HelixAdmin admin;
+
+ public HelixClusterAdmin(String clusterName, String resourceName,
+ int replica, HelixAdmin admin) {
+ this.clusterName = clusterName;
+ this.resourceName = resourceName;
+ this.replica = replica;
+ this.admin = admin;
+ }
+
+ @Override
+ public synchronized void addInstance(String connection) {
+ log.debug(String.format("injecting instance %s in cluster %s", connection, clusterName));
+ admin.addInstance(clusterName, new InstanceConfig(connection));
+ }
+
+ @Override
+ public synchronized void removeInstance(String connection) {
+ log.debug(String.format("removing instance %s from cluster %s", connection, clusterName));
+ admin.dropInstance(clusterName, new InstanceConfig(connection));
+ }
+
+ @Override
+ public void rebalance() {
+ admin.rebalance(clusterName, resourceName, replica);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalClusterManager.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalClusterManager.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalClusterManager.java
new file mode 100644
index 0000000..2bb64de
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalClusterManager.java
@@ -0,0 +1,42 @@
+package org.apache.helix.metamanager.managed;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.metamanager.ClusterAdmin;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.log4j.Logger;
+
+public class LocalClusterManager implements ClusterAdmin {
+
+ static final Logger log = Logger.getLogger(LocalClusterManager.class);
+
+ final String clusterName;
+ final String resourceName;
+ final int replica;
+ final HelixAdmin admin;
+
+ public LocalClusterManager(String clusterName, String resourceName,
+ int replica, HelixAdmin admin) {
+ this.clusterName = clusterName;
+ this.resourceName = resourceName;
+ this.replica = replica;
+ this.admin = admin;
+ }
+
+ @Override
+ public synchronized void addInstance(String connection) {
+ log.debug(String.format("injecting instance %s in cluster %s", connection, clusterName));
+ admin.addInstance(clusterName, new InstanceConfig(connection));
+ }
+
+ @Override
+ public synchronized void removeInstance(String connection) {
+ log.debug(String.format("removing instance %s from cluster %s", connection, clusterName));
+ admin.dropInstance(clusterName, new InstanceConfig(connection));
+ }
+
+ @Override
+ public void rebalance() {
+ admin.rebalance(clusterName, resourceName, replica);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalContainerProvider.java
new file mode 100644
index 0000000..6c8eec0
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalContainerProvider.java
@@ -0,0 +1,87 @@
+package org.apache.helix.metamanager.managed;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+import org.apache.log4j.Logger;
+
+public class LocalContainerProvider implements ClusterContainerProvider {
+
+ static final Logger log = Logger.getLogger(LocalContainerProvider.class);
+
+ static final String REQUIRED_TYPE = "container";
+
+ // global view of processes required
+ static final Object staticLock = new Object();
+ static final Map<String, LocalProcess> processes = new HashMap<String, LocalProcess>();
+
+ int connectionCounter = 0;
+
+ final String zkAddress;
+ final String clusterName;
+ final String providerName;
+
+ public LocalContainerProvider(String zkAddress, String clusterName, String providerName) {
+ this.zkAddress = zkAddress;
+ this.clusterName = clusterName;
+ this.providerName = providerName;
+ }
+
+ @Override
+ public void create(String id, String type) throws Exception {
+ synchronized (staticLock) {
+ if(processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' already exists", id));
+
+ if(!type.equals(REQUIRED_TYPE))
+ throw new IllegalArgumentException(String.format("Type '%s' not supported", type));
+
+ log.info(String.format("Running container '%s' (zkAddress='%s', clusterName='%s')", id, zkAddress, clusterName));
+
+ ManagedProcess process = new ManagedProcess(clusterName, zkAddress, id);
+ process.start();
+
+ processes.put(id, new LocalProcess(id, providerName, process));
+
+ }
+ }
+
+ @Override
+ public void destroy(String id) throws Exception {
+ synchronized (staticLock) {
+ if(!processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+ log.info(String.format("Destroying container '%s'", id));
+
+ LocalProcess local = processes.remove(id);
+
+ local.process.stop();
+ }
+ }
+
+ @Override
+ public void destroyAll() {
+ synchronized (staticLock) {
+ log.info("Destroying all processes");
+ for(String id : new HashSet<String>(processes.keySet())) {
+ try { destroy(id); } catch (Exception ignore) {}
+ }
+ }
+ }
+
+ static class LocalProcess {
+ final String id;
+ final String owner;
+ final ManagedProcess process;
+
+ public LocalProcess(String id, String owner, ManagedProcess process) {
+ this.id = id;
+ this.owner = owner;
+ this.process = process;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalProcessProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalProcessProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalProcessProvider.java
new file mode 100644
index 0000000..01e3ab6
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalProcessProvider.java
@@ -0,0 +1,100 @@
+package org.apache.helix.metamanager.managed;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+import org.apache.log4j.Logger;
+
+public class LocalProcessProvider implements ClusterContainerProvider {
+
+ static final Logger log = Logger.getLogger(LocalProcessProvider.class);
+
+ static final String REQUIRED_TYPE = "container";
+
+ Map<String, ManagedProcess> processes = new HashMap<String, ManagedProcess>();
+ Map<String, String> id2connection = new HashMap<String, String>();
+
+ int connectionCounter = 0;
+
+ final String zkAddress;
+ final String clusterName;
+ final int basePort;
+
+ public LocalProcessProvider(String zkAddress, String clusterName, int basePort) {
+ this.zkAddress = zkAddress;
+ this.clusterName = clusterName;
+ this.basePort = basePort;
+ }
+
+ @Override
+ public synchronized String create(String id, String type) throws Exception {
+ if(processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' already exists", id));
+
+ if(!type.equals(REQUIRED_TYPE))
+ throw new IllegalArgumentException(String.format("Type '%s' not supported", type));
+
+ String connection = "localhost_" + (basePort + connectionCounter);
+ connectionCounter++;
+
+ log.info(String.format("Running container '%s' (zkAddress='%s', clusterName='%s', connection='%s')", id, zkAddress, clusterName, connection));
+
+ ManagedProcess p = new ManagedProcess(clusterName, zkAddress, connection);
+
+ processes.put(id, p);
+ id2connection.put(id, connection);
+
+ return connection;
+ }
+
+ public synchronized void start(String id) throws Exception {
+ if(!processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+ log.info(String.format("Starting container '%s'", id));
+
+ ManagedProcess p = processes.get(id);
+
+ p.start();
+ }
+
+ public synchronized void stop(String id) throws Exception {
+ if(!processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+ log.info(String.format("Stopping container '%s'", id));
+
+ ManagedProcess p = processes.get(id);
+
+ p.stop();
+ }
+
+ @Override
+ public synchronized String destroy(String id) throws Exception {
+ if(!processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+ log.info(String.format("Destroying container '%s'", id));
+
+ String connection = id2connection.get(id);
+
+ processes.remove(id);
+ id2connection.remove(id);
+
+ return connection;
+ }
+
+ public synchronized void destroyAll() {
+ log.info("Destroying all processes");
+ for(String id : new HashSet<String>(processes.keySet())) {
+ try {
+ destroy(id);
+ } catch (Exception ignore) {
+ // ignore
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalStatusProvider.java
new file mode 100644
index 0000000..54f040f
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/LocalStatusProvider.java
@@ -0,0 +1,22 @@
+package org.apache.helix.metamanager.managed;
+
+import org.apache.helix.metamanager.ClusterStatusProvider;
+
+public class LocalStatusProvider implements ClusterStatusProvider {
+
+ int targetContainerCount;
+
+ public LocalStatusProvider(int targetContainerCount) {
+ this.targetContainerCount = targetContainerCount;
+ }
+
+ @Override
+ public int getTargetContainerCount(String type) {
+ return targetContainerCount;
+ }
+
+ public void setTargetContainerCount(int targetContainerCount) {
+ this.targetContainerCount = targetContainerCount;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/Managed.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/Managed.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/Managed.java
new file mode 100644
index 0000000..1e03103
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/Managed.java
@@ -0,0 +1,64 @@
+package org.apache.helix.metamanager.managed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+@StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+public class Managed extends StateModel {
+
+ static final Logger log = Logger.getLogger(Managed.class);
+
+ @Transition(from = "OFFLINE", to = "SLAVE")
+ public void offlineToSlave(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to SLAVE",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "SLAVE", to = "OFFLINE")
+ public void slaveToOffline(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from SLAVE to OFFLINE",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "SLAVE", to = "MASTER")
+ public void slaveToMaster(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from SLAVE to MASTER",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "MASTER", to = "SLAVE")
+ public void masterToSlave(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from MASTER to SLAVE",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void offlineToDropped(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to DROPPED",
+ context.getManager().getInstanceName()));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ManagedFactory.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ManagedFactory.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ManagedFactory.java
new file mode 100644
index 0000000..f51d9c0
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ManagedFactory.java
@@ -0,0 +1,30 @@
+package org.apache.helix.metamanager.managed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.participant.statemachine.StateModelFactory;
+
+public class ManagedFactory extends StateModelFactory<Managed> {
+
+ @Override
+ public Managed createNewStateModel(String partitionName) {
+ return new Managed();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ManagedProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ManagedProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ManagedProcess.java
new file mode 100644
index 0000000..387c459
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ManagedProcess.java
@@ -0,0 +1,85 @@
+package org.apache.helix.metamanager.managed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.log4j.Logger;
+
+public class ManagedProcess
+{
+ static final Logger log = Logger.getLogger(ManagedProcess.class);
+
+ private String clusterName;
+ private String zkAddress;
+ private String instanceName;
+ private HelixManager participantManager;
+
+ public ManagedProcess(String clusterName, String zkAddress, String instanceName)
+ {
+ this.clusterName = clusterName;
+ this.zkAddress = zkAddress;
+ this.instanceName = instanceName;
+
+ }
+
+ public void start() throws Exception
+ {
+ log.info("STARTING "+ instanceName);
+ participantManager = HelixManagerFactory.getZKHelixManager(clusterName,
+ instanceName, InstanceType.PARTICIPANT, zkAddress);
+ participantManager.getStateMachineEngine().registerStateModelFactory(
+ "MasterSlave", new ManagedFactory());
+ participantManager.connect();
+ log.info("STARTED "+ instanceName);
+
+ }
+
+ public void stop()
+ {
+ if (participantManager != null)
+ {
+ participantManager.disconnect();
+ }
+ }
+
+ public static void main(String[] args) throws Exception
+ {
+ final String zkAddress = args[0];
+ final String clusterName = args[1];
+ final String instanceName = args[2];
+
+ // Give a unique id to each process, most commonly used format hostname_port
+ final ManagedProcess managerProcess = new ManagedProcess(clusterName, zkAddress,
+ instanceName);
+ Runtime.getRuntime().addShutdownHook(new Thread()
+ {
+ @Override
+ public void run()
+ {
+ log.info("Shutting down " + instanceName);
+ managerProcess.stop();
+ }
+ });
+ managerProcess.start();
+ Thread.currentThread().join();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ShellContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ShellContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ShellContainerProvider.java
new file mode 100644
index 0000000..107f2c6
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ShellContainerProvider.java
@@ -0,0 +1,85 @@
+package org.apache.helix.metamanager.managed;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+import org.apache.log4j.Logger;
+
+public class ShellContainerProvider implements ClusterContainerProvider {
+
+ static final Logger log = Logger.getLogger(ShellContainerProvider.class);
+
+ static final String REQUIRED_TYPE = "container";
+ static final String RUN_COMMAND = "/bin/sh";
+
+ // global view of processes required
+ static final Object staticLock = new Object();
+ static final Map<String, ShellProcess> processes = new HashMap<String, ShellProcess>();
+
+ final String zkAddress;
+ final String clusterName;
+ final String command;
+ final String providerName;
+
+ public ShellContainerProvider(String zkAddress, String clusterName, String owner, String command) {
+ this.zkAddress = zkAddress;
+ this.clusterName = clusterName;
+ this.command = command;
+ this.providerName = owner;
+ }
+
+ @Override
+ public void create(String id, String type) throws Exception {
+ synchronized (staticLock) {
+ if(processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' already exists", id));
+
+ if(!type.equals(REQUIRED_TYPE))
+ throw new IllegalArgumentException(String.format("Type '%s' not supported", type));
+
+ log.info(String.format("Running container '%s' (zkAddress='%s', clusterName='%s', command='%s')", id, zkAddress, clusterName, command));
+
+ ProcessBuilder builder = new ProcessBuilder(RUN_COMMAND, command, zkAddress, clusterName, id);
+ Process process = builder.start();
+
+ processes.put(id, new ShellProcess(id, providerName, process));
+ }
+ }
+
+ @Override
+ public void destroy(String id) throws Exception {
+ synchronized (staticLock) {
+ if(!processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+ log.info(String.format("Destroying container '%s'", id));
+
+ ShellProcess shell = processes.remove(id);
+ shell.process.destroy();
+ }
+ }
+
+ @Override
+ public void destroyAll() {
+ synchronized (staticLock) {
+ log.info("Destroying all processes");
+ for(ShellProcess process : new HashSet<ShellProcess>(processes.values())) {
+ try { destroy(process.id); } catch (Exception ignore) {}
+ }
+ }
+ }
+
+ static class ShellProcess {
+ final String id;
+ final String owner;
+ final Process process;
+
+ public ShellProcess(String id, String owner, Process process) {
+ this.id = id;
+ this.owner = owner;
+ this.process = process;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ShellProcessProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ShellProcessProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ShellProcessProvider.java
new file mode 100644
index 0000000..0def0f5
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/ShellProcessProvider.java
@@ -0,0 +1,148 @@
+package org.apache.helix.metamanager.managed;
+
+import java.lang.reflect.Field;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+import org.apache.log4j.Logger;
+
+public class ShellProcessProvider implements ClusterContainerProvider {
+
+ static final Logger log = Logger.getLogger(ShellProcessProvider.class);
+
+ static final String REQUIRED_TYPE = "container";
+ static final String RUN_COMMAND = "/bin/sh";
+ static final String KILL_COMMAND = "kill -s SIGINT %d";
+
+ Map<String, ProcessBuilder> builders = new HashMap<String, ProcessBuilder>();
+ Map<String, Process> processes = new HashMap<String, Process>();
+ Map<String, String> id2connection = new HashMap<String, String>();
+
+ int connectionCounter = 0;
+
+ final String zkAddress;
+ final String clusterName;
+ final int basePort;
+ final String command;
+
+ public ShellProcessProvider(String zkAddress, String clusterName, int basePort, String command) {
+ this.zkAddress = zkAddress;
+ this.clusterName = clusterName;
+ this.basePort = basePort;
+ this.command = command;
+ }
+
+ @Override
+ public synchronized String create(String id, String type) throws Exception {
+ if(builders.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' already exists", id));
+
+ if(!type.equals(REQUIRED_TYPE))
+ throw new IllegalArgumentException(String.format("Type '%s' not supported", type));
+
+ String connection = "localhost_" + (basePort + connectionCounter);
+ connectionCounter++;
+
+ log.info(String.format("Running container '%s' (zkAddress='%s', clusterName='%s', connection='%s', command='%s')", id, zkAddress, clusterName, connection, command));
+
+ ProcessBuilder builder = new ProcessBuilder(RUN_COMMAND, command, zkAddress, clusterName, connection);
+
+ builders.put(id, builder);
+ id2connection.put(id, connection);
+
+ return connection;
+ }
+
+ public synchronized void start(String id) throws Exception {
+ if(!builders.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+ if(processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' already running", id));
+
+ log.info(String.format("Starting container '%s'", id));
+
+ Process p = builders.get(id).start();
+
+ processes.put(id, p);
+ }
+
+ public synchronized void stop(String id) throws Exception {
+ if(!builders.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+ if(!processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' not running", id));
+
+ log.info(String.format("Stopping container '%s'", id));
+
+ Process p = processes.get(id);
+
+ int pid = getUnixPID(p);
+ Runtime.getRuntime().exec(String.format(KILL_COMMAND, pid));
+
+ int retVal = p.waitFor();
+ if(retVal != 130) {
+ log.warn(String.format("Process %d returned %d (should be 130, SIGINT)", pid, retVal));
+ }
+
+ processes.remove(id);
+
+ }
+
+ @Override
+ public synchronized String destroy(String id) throws Exception {
+ if(!builders.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+ log.info(String.format("Destroying container '%s'", id));
+
+ if(processes.containsKey(id)) {
+ log.warn(String.format("Forcibly terminating running container '%s'", id));
+ processes.get(id).destroy();
+ processes.remove(id);
+ }
+
+ String connection = id2connection.get(id);
+
+ builders.remove(id);
+ id2connection.remove(id);
+
+ return connection;
+ }
+
+ public synchronized void destroyAll() {
+ log.info("Destroying all processes");
+ for(String id : new HashSet<String>(processes.keySet())) {
+ try {
+ destroy(id);
+ } catch (Exception ignore) {
+ // ignore
+ }
+ }
+ }
+
+ // TODO get PID independently of platform
+ static int getUnixPID(Process process) throws IllegalArgumentException, IllegalAccessException, NoSuchFieldException {
+ if (process.getClass().getName().equals("java.lang.UNIXProcess")) {
+ Class<?> proc = process.getClass();
+ Field field = proc.getDeclaredField("pid");
+ Object value = getFieldValue(field, process);
+ return ((Integer) value).intValue();
+ } else {
+ throw new IllegalArgumentException("Not a UNIXProcess");
+ }
+ }
+
+ static Object getFieldValue(Field field, Object object) throws IllegalArgumentException, IllegalAccessException {
+ Object value;
+ boolean accessible = field.isAccessible();
+ field.setAccessible(true);
+ value = field.get(object);
+ field.setAccessible(accessible);
+ return value;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/YarnContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/YarnContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/YarnContainerProvider.java
new file mode 100644
index 0000000..629788e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/managed/YarnContainerProvider.java
@@ -0,0 +1,37 @@
+package org.apache.helix.metamanager.managed;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+
+public class YarnContainerProvider implements ClusterContainerProvider {
+
+ @Override
+ public String create(String id, String type) throws Exception {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public String destroy(String id) throws Exception {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public void start(String id) throws Exception {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public void stop(String id) throws Exception {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public void destroyAll() {
+ // TODO Auto-generated method stub
+
+ }
+
+}
[09/15] Adding Helix-task-framework and Yarn integration modules
Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/assembly/assembly.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/assembly/assembly.xml b/recipes/meta-cluster-manager/src/main/assembly/assembly.xml
new file mode 100644
index 0000000..03b2ca5
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/assembly/assembly.xml
@@ -0,0 +1,32 @@
+<assembly
+ xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2
+ http://maven.apache.org/xsd/assembly-1.1.2.xsd">
+
+ <id>assembly</id>
+ <formats>
+ <format>tar.gz</format>
+ </formats>
+ <baseDirectory>metamanager</baseDirectory>
+ <fileSets>
+ <fileSet>
+ <directory>target/metamanager-pkg/repo</directory>
+ <outputDirectory>repo</outputDirectory>
+ <excludes>
+ <exclude>**/maven-metadata-appassembler.xml</exclude>
+ </excludes>
+ <fileMode>0644</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>target/metamanager-pkg/bin</directory>
+ <outputDirectory>bin</outputDirectory>
+ <fileMode>0755</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>target/metamanager-pkg/conf</directory>
+ <outputDirectory>conf</outputDirectory>
+ <fileMode>0644</fileMode>
+ </fileSet>
+ </fileSets>
+</assembly>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/config/log4j.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/config/log4j.properties b/recipes/meta-cluster-manager/src/main/config/log4j.properties
new file mode 100644
index 0000000..af33e21
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/config/log4j.properties
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Set root logger level to DEBUG and its only appender to R.
+log4j.rootLogger=ERROR, C
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.C=org.apache.log4j.ConsoleAppender
+log4j.appender.C.layout=org.apache.log4j.PatternLayout
+log4j.appender.C.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.I0Itec=ERROR
+log4j.logger.org.apache=ERROR
+
+log4j.logger.org.apache.helix.metamanager=INFO
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterAdmin.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterAdmin.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterAdmin.java
new file mode 100644
index 0000000..9a83f02
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterAdmin.java
@@ -0,0 +1,30 @@
+package org.apache.helix.metamanager;
+
+/**
+ * Abstraction for instance config (container) injection into and removal from
+ * the managed cluster.
+ *
+ */
+public interface ClusterAdmin {
+
+ /**
+ * Add instance configuration to managed cluster.
+ *
+ * @param instanceId
+ * @param instanceTag
+ */
+ public void addInstance(String instanceId, String instanceTag);
+
+ /**
+ * Remove instance configuration from managed cluster.<br/>
+ * <b>INVARIANT:</b> idempotent
+ *
+ * @param instanceId
+ */
+ public void removeInstance(String instanceId);
+
+ /**
+ * Trigger rebalance of any affected resource in the managed cluster.
+ */
+ public void rebalance();
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterContainerProvider.java
new file mode 100644
index 0000000..6aca07a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterContainerProvider.java
@@ -0,0 +1,32 @@
+package org.apache.helix.metamanager;
+
+public interface ClusterContainerProvider {
+ /**
+ * Create container of given type.
+ *
+ * @param id
+ * unique user-defined container id
+ * @param type
+ * container type
+ * @throws Exception
+ * @return connection string
+ */
+ public void create(String id, String type) throws Exception;
+
+ /**
+ * Destroy container.
+ *
+ * @param id
+ * unique user-defined container id
+ * @return connection string
+ * @throws Exception
+ */
+ public void destroy(String id) throws Exception;
+
+ /**
+ * Stops all running processes and destroys containers. Best-effort for
+ * cleanup.
+ *
+ */
+ public void destroyAll();
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterContainerStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterContainerStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterContainerStatusProvider.java
new file mode 100644
index 0000000..e68c0ee
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterContainerStatusProvider.java
@@ -0,0 +1,7 @@
+package org.apache.helix.metamanager;
+
+public interface ClusterContainerStatusProvider {
+ public boolean exists(String id);
+ public boolean isActive(String id);
+ public boolean isFailed(String id);
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterInstanceInjector.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterInstanceInjector.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterInstanceInjector.java
new file mode 100644
index 0000000..d29e1c3
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterInstanceInjector.java
@@ -0,0 +1,6 @@
+package org.apache.helix.metamanager;
+
+public interface ClusterInstanceInjector {
+ public void addInstance(String connection);
+ public void removeInstance(String connection);
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterStatusProvider.java
new file mode 100644
index 0000000..1812dc3
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ClusterStatusProvider.java
@@ -0,0 +1,5 @@
+package org.apache.helix.metamanager;
+
+public interface ClusterStatusProvider {
+ public int getTargetContainerCount(String containerType) throws Exception;
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ConfigTool.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ConfigTool.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ConfigTool.java
new file mode 100644
index 0000000..596743b
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ConfigTool.java
@@ -0,0 +1,47 @@
+package org.apache.helix.metamanager;
+
+import org.apache.log4j.Logger;
+
+public class ConfigTool {
+
+ static final Logger log = Logger.getLogger(ConfigTool.class);
+
+ public static final String SHELL_CONTAINER_PATH = "target/metamanager-pkg/bin/shell-container-process.sh";
+ public static final String SHELL_CONTAINER_PROPERTIES = "container.properties";
+ public static final String SHELL_CONTAINER_MARKER = "active";
+
+ public static final String YARN_MASTER_ARCHIVE_PATH = "target/metamanager-assembly.tar.gz";
+ public static final String YARN_MASTER_PATH = "master/metamanager/bin/yarn-master-process.sh";
+ public static final String YARN_MASTER_STAGING = "master.tar.gz";
+ public static final String YARN_MASTER_DESTINATION = "master";
+ public static final String YARN_MASTER_PROPERTIES = "master.properties";
+ public static final String YARN_CONTAINER_ARCHIVE_PATH = "target/metamanager-assembly.tar.gz";
+ public static final String YARN_CONTAINER_STAGING = "container.tar.gz";
+ public static final String YARN_CONTAINER_PATH = "container/metamanager/bin/yarn-container-process.sh";
+ public static final String YARN_CONTAINER_DESTINATION = "container";
+ public static final String YARN_CONTAINER_PROPERTIES = "container.properties";
+
+ public static final long CONTAINER_TIMEOUT = 60000;
+
+ static TargetProvider targetProvider;
+ static StatusProvider statusProvider;
+
+ private ConfigTool() {
+ // left blank
+ }
+
+ public static TargetProvider getTargetProvider() {
+ return targetProvider;
+ }
+ public static void setTargetProvider(TargetProvider targetProvider) {
+ ConfigTool.targetProvider = targetProvider;
+ }
+
+ public static StatusProvider getStatusProvider() {
+ return statusProvider;
+ }
+ public static void setStatusProvider(StatusProvider statusProvider) {
+ ConfigTool.statusProvider = statusProvider;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerProvider.java
new file mode 100644
index 0000000..2483bba
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerProvider.java
@@ -0,0 +1,40 @@
+package org.apache.helix.metamanager;
+
+import org.apache.helix.metamanager.provider.ProviderStateModel;
+
+/**
+ * Abstraction for container deployment framework. Creates and destroys
+ * container instances. Is invoked by ProviderStateModel and must be blocking.
+ *
+ * @see ProviderStateModel
+ */
+public interface ContainerProvider {
+ /**
+ * Create container of given type.<br/>
+ * <b>INVARIANT:</b> synchronous invocation
+ *
+ * @param id
+ * unique user-defined container id
+ * @param containerType
+ * container type
+ * @throws Exception
+ */
+ public void create(String id, String containerType) throws Exception;
+
+ /**
+ * Destroy container.<br/>
+ * <b>INVARIANT:</b> synchronous invocation
+ *
+ * @param id
+ * unique user-defined container id
+ * @throws Exception
+ */
+ public void destroy(String id) throws Exception;
+
+ /**
+ * Stops all running processes and destroys containers. Best-effort for
+ * cleanup.
+ *
+ */
+ public void destroyAll();
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerProviderService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerProviderService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerProviderService.java
new file mode 100644
index 0000000..a7da053
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerProviderService.java
@@ -0,0 +1,9 @@
+package org.apache.helix.metamanager;
+
+/**
+ * ContainerProvider as configurable service.
+ *
+ */
+public interface ContainerProviderService extends ContainerProvider, Service {
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerStatusProvider.java
new file mode 100644
index 0000000..d2853d9
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ContainerStatusProvider.java
@@ -0,0 +1,7 @@
+package org.apache.helix.metamanager;
+
+public interface ContainerStatusProvider {
+ public boolean exists(String id);
+ public boolean isActive(String id);
+ public boolean isFailed(String id);
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/FileStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/FileStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/FileStatusProvider.java
new file mode 100644
index 0000000..06e2251
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/FileStatusProvider.java
@@ -0,0 +1,27 @@
+package org.apache.helix.metamanager;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+
+public class FileStatusProvider implements ClusterStatusProvider {
+
+ final File file;
+
+ public FileStatusProvider(File file) {
+ this.file = file;
+ }
+
+ @Override
+ public int getTargetContainerCount(String containerType) throws FileNotFoundException, IOException, IllegalArgumentException {
+ Properties properties = new Properties();
+ properties.load(new FileReader(file));
+ if(!properties.contains(containerType))
+ throw new IllegalArgumentException(String.format("container type '%s' not found in '%s'", containerType, file.getCanonicalPath()));
+ return Integer.parseInt((String)properties.get(containerType));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/HelixClusterAdmin.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/HelixClusterAdmin.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/HelixClusterAdmin.java
new file mode 100644
index 0000000..3dd2f48
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/HelixClusterAdmin.java
@@ -0,0 +1,43 @@
+package org.apache.helix.metamanager;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.log4j.Logger;
+
+/**
+ * Implementation of ClusterAdmin based on Helix.
+ *
+ */
+public class HelixClusterAdmin implements ClusterAdmin {
+
+ static final Logger log = Logger.getLogger(HelixClusterAdmin.class);
+
+ final String cluster;
+ final HelixAdmin admin;
+
+ public HelixClusterAdmin(String clusterName, HelixAdmin admin) {
+ this.cluster = clusterName;
+ this.admin = admin;
+ }
+
+ @Override
+ public synchronized void addInstance(String instanceId, String instanceTag) {
+ log.debug(String.format("injecting instance %s (tag=%s) in cluster %s", instanceId, instanceTag, cluster));
+ admin.addInstance(cluster, new InstanceConfig(instanceId));
+ admin.addInstanceTag(cluster, instanceId, instanceTag);
+ }
+
+ @Override
+ public synchronized void removeInstance(String connection) {
+ log.debug(String.format("removing instance %s from cluster %s", connection, cluster));
+ admin.dropInstance(cluster, new InstanceConfig(connection));
+ }
+
+ @Override
+ public void rebalance() {
+ for (String resourceName : admin.getResourcesInCluster(cluster)) {
+ int replica = Integer.parseInt(admin.getResourceIdealState(cluster, resourceName).getReplicas());
+ admin.rebalance(cluster, resourceName, replica);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/Manager.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/Manager.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/Manager.java
new file mode 100644
index 0000000..ab91ae7
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/Manager.java
@@ -0,0 +1,129 @@
+package org.apache.helix.metamanager;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+@StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE" })
+public class Manager extends StateModel {
+
+ static final Logger log = Logger.getLogger(Manager.class);
+
+ ClusterContainerProvider provider;
+ ClusterAdmin admin;
+
+ public Manager(ClusterContainerProvider provider, ClusterAdmin admin) {
+ this.provider = provider;
+ this.admin = admin;
+ }
+
+ @Transition(from = "OFFLINE", to = "ONLINE")
+ public void acquire(Message m, NotificationContext context) throws Exception {
+ String containerType = m.getResourceName();
+ String containerId = m.getPartitionName();
+ String instanceId = context.getManager().getInstanceName();
+
+ log.trace(String.format("%s:%s transitioning from OFFLINE to ONLINE",
+ containerId, instanceId));
+
+ bestEffortRemove(containerId);
+
+ // add instance to cluster
+ admin.addInstance(containerId);
+
+ // create container
+ provider.create(containerId, containerType);
+
+ try {
+ admin.rebalance();
+ } catch (Exception e) {
+ // ignore
+ log.warn(String.format("rebalancing cluster failed (error='%s')", e.getMessage()));
+ }
+
+ log.info(String.format("%s acquired container '%s' (type='%s')",
+ instanceId, containerId, containerType));
+ }
+
+ @Transition(from = "ONLINE", to = "OFFLINE")
+ public void release(Message m, NotificationContext context) {
+ String containerId = m.getPartitionName();
+ String instanceId = context.getManager().getInstanceName();
+
+ log.trace(String.format("%s:%s transitioning from ONLINE to OFFLINE",
+ containerId, instanceId));
+
+ bestEffortRemove(containerId);
+
+ try {
+ admin.rebalance();
+ } catch (Exception e) {
+ // ignore
+ log.warn(String.format("rebalancing cluster failed (error='%s')", e.getMessage()));
+ }
+
+ log.info(String.format("%s destroyed container '%s'",
+ instanceId, containerId));
+
+ }
+
+ @Transition(from = "ERROR", to = "OFFLINE")
+ public void recover(Message m, NotificationContext context) {
+ String containerId = m.getPartitionName();
+ String instanceId = context.getManager().getInstanceName();
+
+ log.trace(String.format("%s:%s transitioning from ERROR to OFFLINE",
+ containerId, instanceId));
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void drop(Message m, NotificationContext context) {
+ String containerId = m.getPartitionName();
+ String instanceId = context.getManager().getInstanceName();
+
+ log.trace(String.format("%s:%s transitioning from OFFLINE to DROPPED",
+ containerId, instanceId));
+ }
+
+ private void bestEffortRemove(String containerId) {
+ log.debug(String.format("Best effort removal of container '%s'", containerId));
+
+ try {
+ provider.destroy(containerId);
+ log.debug(String.format("Container '%s' destroyed", containerId));
+ } catch (Exception e) {
+ log.debug(String.format("Container '%s' does not exist", containerId));
+ }
+
+ try {
+ admin.removeInstance(containerId);
+ log.debug(String.format("Instance '%s' removed", containerId));
+ } catch (Exception e) {
+ log.debug(String.format("Instance '%s' does not exist", containerId));
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerDemo.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerDemo.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerDemo.java
new file mode 100644
index 0000000..35891f0
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerDemo.java
@@ -0,0 +1,463 @@
+package org.apache.helix.metamanager;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.I0Itec.zkclient.IDefaultNameSpace;
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkServer;
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.managed.HelixClusterAdmin;
+import org.apache.helix.metamanager.managed.LocalStatusProvider;
+import org.apache.helix.metamanager.provider.local.LocalContainerProvider;
+import org.apache.helix.metamanager.provider.local.LocalContainerStatusProvider;
+import org.apache.helix.metamanager.provider.shell.ShellContainerProvider;
+import org.apache.helix.metamanager.provider.shell.ShellContainerStatusProvider;
+import org.apache.helix.metamanager.provider.yarn.ApplicationConfig;
+import org.apache.helix.metamanager.provider.yarn.YarnApplication;
+import org.apache.helix.metamanager.provider.yarn.YarnContainerProvider;
+import org.apache.helix.metamanager.provider.yarn.YarnContainerStatusProvider;
+import org.apache.helix.model.ExternalView;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.IdealStateModeProperty;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.tools.ClusterStateVerifier;
+import org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+public class ManagerDemo
+{
+ static final long TIMESTEP_INTERVAL = 1000;
+
+ static final String MANAGED_PROCESS_PATH = "target/meta-cluster-manager-pkg/bin/container-process.sh";
+ static final String YARN_PROCESS_PATH = "/home/apucher/incubator-helix/recipes/meta-cluster-manager/target/meta-cluster-manager-pkg/bin/yarn-container-process.sh";
+
+ static final String PROVIDER_LOCAL = "LOCAL";
+ static final String PROVIDER_SHELL = "SHELL";
+ static final String PROVIDER_YARN = "YARN";
+
+ static final Logger log = Logger.getLogger(ManagerDemo.class);
+
+ static final int zkPort = 2199;
+ static final String zkAddress = "localhost:" + zkPort;
+ static final String metaClusterName = "meta-cluster";
+ static final String managedClusterName = "managed-cluster";
+ static final String metaResourceName = "container";
+ static final String managedResourceName = "database";
+
+ static final int numContainerProviders = 3;
+ static final int numContainerMax = 7;
+ static final int numContainerMin = 3;
+ static final int numContainerStep = 2;
+ static final int numContainerReplica = 1;
+
+ static final int numManagedPartitions = 10;
+ static final int numManagedReplica = 2;
+
+ static List<ClusterContainerProvider> providers = new ArrayList<ClusterContainerProvider>();
+ static int providerCount = 0;
+
+ static Collection<YarnContainerProvider> yarnProviders = new ArrayList<YarnContainerProvider>();
+ static Collection<YarnContainerStatusProvider> yarnStatusProviders = new ArrayList<YarnContainerStatusProvider>();
+ static Collection<YarnApplication> yarnApplications = new ArrayList<YarnApplication>();
+
+ /**
+ * LockManagerDemo clusterName, numInstances, lockGroupName, numLocks
+ *
+ * @param args
+ * @throws Exception
+ */
+ public static void main(String[] args) throws Exception
+ {
+
+ String containerProviderType = PROVIDER_LOCAL;
+ if(args.length >= 1) {
+ containerProviderType = args[0];
+ }
+
+ LocalStatusProvider clusterStatusProvider = null;
+ ManagerProcess[] managerProcesses = new ManagerProcess[numContainerProviders];
+
+ HelixManager metaControllerManager = null;
+ HelixManager managedControllerManager = null;
+
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ for(ClusterContainerProvider provider : providers) {
+ log.info("Destroying all containers of provider");
+ provider.destroyAll();
+ }
+ for(YarnContainerProvider provider : yarnProviders) {
+ log.info("Stopping yarn container provider");
+ provider.stopService();
+ }
+ for(YarnContainerStatusProvider provider : yarnStatusProviders) {
+ log.info("Stopping yarn container status provider");
+ provider.stopService();
+ }
+ for(YarnApplication application: yarnApplications) {
+ log.info("Stopping yarn application");
+ try { application.stop(); } catch(Exception ignore) {}
+ }
+ }
+ }));
+
+ try
+ {
+ log.info("Starting ZooKeeper");
+ startLocalZookeeper();
+ HelixAdmin admin = new ZKHelixAdmin(zkAddress);
+
+ log.info("Create clusters");
+ admin.addCluster(metaClusterName, true);
+ admin.addCluster(managedClusterName, true);
+
+ log.info("Create providers");
+ clusterStatusProvider = new LocalStatusProvider(numContainerMin);
+ ClusterContainerStatusProvider containerStatusProvider = createContainerStatusProvider(containerProviderType);
+
+ log.info("Setup config tool");
+ ConfigTool.setClusterStatusProvider(clusterStatusProvider);
+ ConfigTool.setContainerStatusProvider(containerStatusProvider);
+
+ // Managed Cluster
+ log.info("Setup managed cluster");
+ admin.addStateModelDef(managedClusterName, "MasterSlave",
+ new StateModelDefinition(StateModelConfigGenerator.generateConfigForMasterSlave()));
+ admin.addResource(managedClusterName, managedResourceName, numManagedPartitions,
+ "MasterSlave", IdealStateModeProperty.AUTO_REBALANCE.toString());
+ admin.rebalance(managedClusterName, managedResourceName, numManagedReplica);
+
+ // Meta Cluster
+ log.info("Setup meta cluster");
+ admin.addStateModelDef(metaClusterName, "OnlineOffline",
+ new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+ admin.addResource(metaClusterName, metaResourceName, clusterStatusProvider.getTargetContainerCount(""),
+ "OnlineOffline", IdealStateModeProperty.AUTO_REBALANCE.toString());
+
+ IdealState idealState = admin.getResourceIdealState(metaClusterName, metaResourceName);
+ idealState.setRebalancerClassName(ManagerRebalancer.class.getName());
+ //idealState.getRecord().setSimpleField(IdealStateProperty.REBALANCE_TIMER_PERIOD.toString(), "2000"); // Timer trigger creates race condition
+ admin.setResourceIdealState(metaClusterName, metaResourceName, idealState);
+ admin.rebalance(metaClusterName, metaResourceName, 1);
+
+ log.info("Starting meta processes (container providers)");
+ for (int i = 0; i < numContainerProviders; i++)
+ {
+ String instanceName = "provider_" + i;
+ admin.addInstance(metaClusterName, new InstanceConfig(instanceName));
+
+ ClusterAdmin clusterAdmin = new HelixClusterAdmin(managedClusterName, managedResourceName, numManagedReplica, admin);
+
+ managerProcesses[i] = new ManagerProcess(metaClusterName, zkAddress,
+ instanceName, createContainerProvider(containerProviderType), clusterAdmin);
+ managerProcesses[i].start();
+ }
+
+ log.info("Starting managed cluster controller");
+ managedControllerManager = HelixControllerMain.startHelixController(zkAddress,
+ managedClusterName, "managedController", HelixControllerMain.STANDALONE);
+ log.info("Starting meta cluster controller");
+ metaControllerManager = HelixControllerMain.startHelixController(zkAddress,
+ metaClusterName, "metaController", HelixControllerMain.STANDALONE);
+
+ waitUntilRebalancedCount(numContainerMin, admin);
+ printStep("Initial cluster state", admin);
+
+ while(clusterStatusProvider.getTargetContainerCount("") < numContainerMax) {
+ int newCount = clusterStatusProvider.getTargetContainerCount("") + numContainerStep;
+
+ log.info(String.format("Increasing container count to %d", newCount));
+ clusterStatusProvider.setTargetContainerCount(newCount);
+
+ triggerPipeline(admin);
+ waitUntilRebalancedCount(newCount, admin);
+ printStep(String.format("Increased container count to %d", newCount), admin);
+ }
+
+ log.info("Destroying container 0 and container 1");
+ int currentCount = clusterStatusProvider.getTargetContainerCount("");
+ providers.get(0).destroy("container_0");
+ providers.get(0).destroy("container_1");
+ triggerPipeline(admin);
+ waitUntilRebalancedCount(currentCount, admin);
+ printStep("Destroyed container 0 and container 1", admin);
+
+ log.info("Destroying container provider 0");
+ currentCount = clusterStatusProvider.getTargetContainerCount("");
+ managerProcesses[0].stop();
+ waitUntilRebalancedCount(currentCount, admin);
+ printStep("Destroyed container provider 0", admin);
+
+ while(clusterStatusProvider.getTargetContainerCount("") > numContainerMin) {
+ int newCount = clusterStatusProvider.getTargetContainerCount("") - numContainerStep;
+
+ log.info(String.format("Decreasing container count to %d", newCount));
+ clusterStatusProvider.setTargetContainerCount(newCount);
+
+ triggerPipeline(admin);
+ waitUntilRebalancedCount(newCount, admin);
+ printStep(String.format("Decreased container count to %d", clusterStatusProvider.getTargetContainerCount("")), admin);
+ }
+
+ log.info("Stopping processes");
+
+ } catch (Exception e)
+ {
+ e.printStackTrace();
+ } finally
+ {
+ if (managedControllerManager != null) {
+ log.info("Disconnecting managed cluster controller");
+ managedControllerManager.disconnect();
+ }
+ if (metaControllerManager != null) {
+ log.info("Disconnecting meta cluster controller");
+ metaControllerManager.disconnect();
+ }
+ log.info("Destroying meta processes");
+ for (ManagerProcess process : managerProcesses) {
+ process.stop();
+ }
+ }
+
+ // TODO clean up threads correctly
+ System.exit(0);
+ }
+
+private static void triggerPipeline(HelixAdmin admin) {
+ IdealState poke = admin.getResourceIdealState(metaClusterName, metaResourceName);
+ admin.setResourceIdealState(metaClusterName, metaResourceName, poke);
+}
+
+ private static void printStep(String text, HelixAdmin admin) throws Exception {
+ log.info("********************************************************************************");
+ log.info(text);
+ log.info("********************************************************************************");
+ printClusterStatus(admin);
+
+ System.out.println("Press ENTER to continue");
+ System.in.read();
+ }
+
+ static void printClusterStatus(HelixAdmin admin) throws Exception {
+ log.info("Managed cluster status");
+ printStatusMasterSlave(admin);
+ log.info("Meta cluster status");
+ printMetaClusterStatus(admin);
+ }
+
+ static void waitUntilRebalancedCount(int containerCount, HelixAdmin admin) throws InterruptedException {
+ Thread.sleep(TIMESTEP_INTERVAL);
+ while(containerCount != getMetaContainerCount(admin) ||
+ containerCount != getManagedContainerCount(admin)) {
+ Thread.sleep(TIMESTEP_INTERVAL);
+ }
+ ClusterStateVerifier.verifyByPolling(new BestPossAndExtViewZkVerifier(zkAddress, managedClusterName));
+ }
+
+ static int getMetaContainerCount(HelixAdmin admin) {
+ Set<String> assignedInstances = new HashSet<String>();
+
+ ExternalView externalView = admin.getResourceExternalView(metaClusterName, metaResourceName);
+
+ for (String partitionName : externalView.getPartitionSet())
+ {
+ Map<String, String> stateMap = externalView.getStateMap(partitionName);
+ if(stateMap == null)
+ continue;
+
+ for(String instanceName : stateMap.keySet()){
+ if ("ONLINE".equals(stateMap.get(instanceName))) {
+ assignedInstances.add(partitionName);
+ break;
+ }
+ }
+ }
+
+ return assignedInstances.size();
+ }
+
+ static int getManagedContainerCount(HelixAdmin admin) {
+ Set<String> assignedInstances = new HashSet<String>();
+
+ ExternalView externalView = admin.getResourceExternalView(managedClusterName, managedResourceName);
+
+ for (String partitionName : externalView.getPartitionSet())
+ {
+ Map<String, String> stateMap = externalView.getStateMap(partitionName);
+ if(stateMap == null)
+ continue;
+
+ for(String instanceName : stateMap.keySet()){
+ if ("MASTER".equals(stateMap.get(instanceName)) ||
+ "SLAVE".equals(stateMap.get(instanceName))) {
+ assignedInstances.add(instanceName);
+ }
+ }
+ }
+
+ return assignedInstances.size();
+ }
+
+ static void printMetaClusterStatus(HelixAdmin admin)
+ {
+ ExternalView externalView = admin
+ .getResourceExternalView(metaClusterName, metaResourceName);
+ TreeSet<String> treeSet = new TreeSet<String>(
+ externalView.getPartitionSet());
+ log.info("container" + "\t" + "acquired by");
+ log.info("======================================");
+ for (String partitionName : treeSet)
+ {
+ Map<String, String> stateMap = externalView.getStateMap(partitionName);
+ String acquiredBy = null;
+ if (stateMap != null)
+ {
+ for(String instanceName:stateMap.keySet()){
+ if ("ONLINE".equals(stateMap.get(instanceName))){
+ acquiredBy = instanceName;
+ break;
+ }
+ }
+ }
+ log.info(partitionName + "\t"
+ + ((acquiredBy != null) ? acquiredBy : "NONE"));
+ }
+ }
+
+ static void printStatusMasterSlave(HelixAdmin admin)
+ {
+ ExternalView externalView = admin
+ .getResourceExternalView(managedClusterName, managedResourceName);
+ TreeSet<String> treeSet = new TreeSet<String>(
+ externalView.getPartitionSet());
+ log.info("partition" + "\t" + "master" + "\t\t" + "slave");
+ log.info("============================================================");
+ for (String partitionName : treeSet)
+ {
+ Map<String, String> stateMap = externalView.getStateMap(partitionName);
+ String master = "NONE";
+ String slave = "NONE";
+ if (stateMap != null)
+ {
+ for(String instanceName:stateMap.keySet()){
+ if ("MASTER".equals(stateMap.get(instanceName))){
+ master = instanceName;
+ }
+ if ("SLAVE".equals(stateMap.get(instanceName))){
+ slave = instanceName;
+ }
+ }
+ }
+ log.info(String.format("%s\t%s\t%s", partitionName, master, slave));
+ }
+ }
+
+ public static void startLocalZookeeper() throws Exception
+ {
+ ZkServer server = null;
+ String baseDir = "/tmp/IntegrationTest/";
+ final String dataDir = baseDir + "zk/dataDir";
+ final String logDir = baseDir + "/tmp/logDir";
+ FileUtils.deleteDirectory(new File(dataDir));
+ FileUtils.deleteDirectory(new File(logDir));
+
+ IDefaultNameSpace defaultNameSpace = new IDefaultNameSpace()
+ {
+ @Override
+ public void createDefaultNameSpace(ZkClient zkClient)
+ {
+
+ }
+ };
+ server = new ZkServer(dataDir, logDir, defaultNameSpace, zkPort);
+ server.start();
+
+ }
+
+ private static ClusterContainerProvider createContainerProvider(String type) throws Exception {
+ String providerName = "provider_" + providerCount;
+ providerCount++;
+
+ if(PROVIDER_LOCAL.equalsIgnoreCase(type)) {
+ log.info("Using VM-local container provider");
+ LocalContainerProvider provider = new LocalContainerProvider(zkAddress, managedClusterName, providerName);
+ providers.add(provider);
+ return provider;
+ } else if (PROVIDER_SHELL.equalsIgnoreCase(type)) {
+ log.info("Using shell-based container provider");
+ ShellContainerProvider provider = new ShellContainerProvider(zkAddress, managedClusterName, providerName, MANAGED_PROCESS_PATH);
+ providers.add(provider);
+ return provider;
+ } else if (PROVIDER_YARN.equalsIgnoreCase(type)) {
+ ApplicationConfig appConfig = new ApplicationConfig(zkAddress, managedClusterName, zkAddress, providerName);
+
+ log.info("Using yarn-based container provider");
+ YarnApplication yarnApplication = new YarnApplication(appConfig);
+ yarnApplication.start();
+ yarnApplications.add(yarnApplication);
+
+ YarnContainerProvider yarnProvider = new YarnContainerProvider(appConfig, YARN_PROCESS_PATH);
+ yarnProvider.startService();
+ yarnProviders.add(yarnProvider);
+
+ providers.add(yarnProvider);
+ return yarnProvider;
+ } else {
+ throw new IllegalArgumentException(String.format("Unknown container provider type '%s'", type));
+ }
+ }
+
+ private static ClusterContainerStatusProvider createContainerStatusProvider(String type) throws Exception {
+ if(PROVIDER_LOCAL.equalsIgnoreCase(type)) {
+ log.info("Using VM-local container status provider");
+ LocalContainerStatusProvider provider = new LocalContainerStatusProvider();
+ return provider;
+ } else if (PROVIDER_SHELL.equalsIgnoreCase(type)) {
+ log.info("Using shell-based container status provider");
+ ShellContainerStatusProvider provider = new ShellContainerStatusProvider();
+ return provider;
+ } else if (PROVIDER_YARN.equalsIgnoreCase(type)) {
+ log.info("Using yarn-based container status provider");
+ YarnContainerStatusProvider provider = new YarnContainerStatusProvider(zkAddress);
+ provider.startService();
+ yarnStatusProviders.add(provider);
+ return provider;
+ } else {
+ throw new IllegalArgumentException(String.format("Unknown container status provider type '%s'", type));
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerFactory.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerFactory.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerFactory.java
new file mode 100644
index 0000000..44a924e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerFactory.java
@@ -0,0 +1,39 @@
+package org.apache.helix.metamanager;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.participant.statemachine.StateModelFactory;
+
+public class ManagerFactory extends StateModelFactory<Manager> {
+
+ final ClusterContainerProvider provider;
+ final ClusterAdmin admin;
+
+ public ManagerFactory(ClusterContainerProvider provider, ClusterAdmin admin) {
+ super();
+ this.provider = provider;
+ this.admin = admin;
+ }
+
+ @Override
+ public Manager createNewStateModel(String partitionName) {
+ return new Manager(provider, admin);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerProcess.java
new file mode 100644
index 0000000..7812e6f
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerProcess.java
@@ -0,0 +1,67 @@
+package org.apache.helix.metamanager;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.log4j.Logger;
+
+public class ManagerProcess
+{
+ static final Logger log = Logger.getLogger(ManagerProcess.class);
+
+ final String clusterName;
+ final String zkAddress;
+ final String instanceName;
+ final ClusterContainerProvider provider;
+ final ClusterAdmin admin;
+
+ HelixManager participantManager;
+
+ ManagerProcess(String clusterName, String zkAddress, String instanceName, ClusterContainerProvider provider, ClusterAdmin admin)
+ {
+ this.clusterName = clusterName;
+ this.zkAddress = zkAddress;
+ this.instanceName = instanceName;
+ this.provider = provider;
+ this.admin = admin;
+ }
+
+ public void start() throws Exception
+ {
+ log.info("STARTING "+ instanceName);
+ participantManager = HelixManagerFactory.getZKHelixManager(clusterName,
+ instanceName, InstanceType.PARTICIPANT, zkAddress);
+ participantManager.getStateMachineEngine().registerStateModelFactory(
+ "OnlineOffline", new ManagerFactory(provider, admin));
+ participantManager.connect();
+ log.info("STARTED "+ instanceName);
+
+ }
+
+ public void stop()
+ {
+ if (participantManager != null)
+ {
+ participantManager.disconnect();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerRebalancer.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerRebalancer.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerRebalancer.java
new file mode 100644
index 0000000..2b2824c
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ManagerRebalancer.java
@@ -0,0 +1,167 @@
+package org.apache.helix.metamanager;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.rebalancer.Rebalancer;
+import org.apache.helix.controller.stages.ClusterDataCache;
+import org.apache.helix.controller.stages.CurrentStateOutput;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.Partition;
+import org.apache.log4j.Logger;
+
+/**
+ * Rebalancer for cluster state. Uses cluster status provider.<br/>
+ * <br/>
+ * IdealState mapping:<br/>
+ * resource = tag-name<br/>
+ * partition = logical container<br/>
+ * instance = resource provider<br/>
+ * status = physical container presence
+ *
+ */
+public class ManagerRebalancer implements Rebalancer {
+
+ static final Logger log = Logger.getLogger(ManagerRebalancer.class);
+
+ static final long UPDATE_INTERVAL_MIN = 1500;
+
+ static final Object lock = new Object();
+ static long nextUpdate = 0;
+
+ ClusterStatusProvider clusterStatusProvider;
+ ClusterContainerStatusProvider containerStatusProvider;
+ HelixManager manager;
+
+ @Override
+ public void init(HelixManager manager) {
+ this.clusterStatusProvider = ConfigTool.getClusterStatusProvider();
+ this.containerStatusProvider = ConfigTool.getContainerStatusProvider();
+ this.manager = manager;
+ }
+
+ @Override
+ public IdealState computeNewIdealState(String resourceName,
+ IdealState currentIdealState,
+ CurrentStateOutput currentStateOutput, ClusterDataCache clusterData) {
+
+// synchronized(lock) {
+// if(nextUpdate > System.currentTimeMillis()) {
+// return currentIdealState;
+// }
+// nextUpdate = System.currentTimeMillis() + UPDATE_INTERVAL_MIN;
+
+ // target container count
+ int targetCount = clusterStatusProvider.getTargetContainerCount(resourceName);
+
+ // currently active containers
+ List<String> currentPartitions = new ArrayList<String>();
+ for(String partitionName : currentIdealState.getPartitionSet()) {
+ Map<String, String> currentStateMap = currentStateOutput.getCurrentStateMap(resourceName, new Partition(partitionName));
+ Map<String, String> pendingStateMap = currentStateOutput.getPendingStateMap(resourceName, new Partition(partitionName));
+
+ if(hasOnlineInstance(currentStateMap) ||
+ hasOnlineInstance(pendingStateMap)) {
+ currentPartitions.add(partitionName);
+ }
+ }
+ int currentCount = currentPartitions.size();
+
+ // currently failed containers
+ List<String> failedPartitions = new ArrayList<String>();
+ for(String partitionName : currentIdealState.getPartitionSet()) {
+ Map<String, String> currentStateMap = currentStateOutput.getCurrentStateMap(resourceName, new Partition(partitionName));
+
+ if(!hasOnlineInstance(currentStateMap))
+ continue;
+
+ // container listed online, but does not exist
+ if(!containerStatusProvider.exists(partitionName)) {
+ log.warn(String.format("Container '%s' designated ONLINE, but does not exist", partitionName));
+ failedPartitions.add(partitionName);
+ }
+
+ // container listed online and exists, but in failure state
+ if(containerStatusProvider.exists(partitionName) &&
+ containerStatusProvider.isFailed(partitionName)) {
+ log.warn(String.format("Container '%s' designated ONLINE, but in failure state", partitionName));
+ failedPartitions.add(partitionName);
+ }
+ }
+ int failureCount = failedPartitions.size();
+
+ if(currentCount != targetCount ||
+ failureCount != 0) {
+ log.info(String.format("Rebalancing containers (current=%d, target=%d, failures=%d)", currentCount, targetCount, failureCount));
+
+ currentIdealState.setNumPartitions(targetCount);
+
+ // future active containers
+ log.debug("active containers");
+ List<String> activePartitions = new ArrayList<String>();
+ for(int i=0; i<targetCount; i++) {
+ String partitionName = resourceName + "_" + i;
+ activePartitions.add(partitionName);
+ }
+ activePartitions.removeAll(failedPartitions);
+
+ // future passive containers
+ log.debug("passive containers");
+ List<String> passivePartitions = new ArrayList<String>();
+ for(int i=targetCount; i<currentCount; i++) {
+ String partitionName = resourceName + "_" + i;
+ passivePartitions.add(partitionName);
+ }
+ passivePartitions.addAll(failedPartitions);
+
+ log.debug("output");
+ if(log.isDebugEnabled()) {
+ log.debug(String.format("%s: failed partitions %s", resourceName, failedPartitions));
+ log.debug(String.format("%s: active partitions %s", resourceName, activePartitions));
+ log.debug(String.format("%s: passive partitions %s", resourceName, passivePartitions));
+ }
+
+ log.debug("building ideal state");
+ Map<String, List<String>> listFields = new HashMap<String, List<String>>();
+ Map<String, Map<String, String>> mapFields = new HashMap<String, Map<String, String>>();
+ for(String partitionName : activePartitions) {
+ listFields.put(partitionName, new ArrayList<String>());
+ mapFields.put(partitionName, new HashMap<String, String>());
+ }
+ currentIdealState.getRecord().setListFields(listFields);
+ currentIdealState.getRecord().setMapFields(mapFields);
+
+ log.debug("setting ideal state");
+ String clusterName = manager.getClusterName();
+ manager.getClusterManagmentTool().setResourceIdealState(clusterName, resourceName, currentIdealState);
+
+ log.debug("enable partitions");
+ for(String instanceName : clusterData.getInstanceConfigMap().keySet()) {
+ log.debug(String.format("enable partitions for '%s'", instanceName));
+ manager.getClusterManagmentTool().enablePartition(true, clusterName, instanceName, resourceName, activePartitions);
+ log.debug(String.format("disable partitions for '%s'", instanceName));
+ manager.getClusterManagmentTool().enablePartition(false, clusterName, instanceName, resourceName, passivePartitions);
+ }
+
+ log.debug("done");
+ }
+
+ return currentIdealState;
+// }
+ }
+
+ private boolean hasOnlineInstance(Map<String, String> stateMap) {
+ if(!stateMap.isEmpty()) {
+ for(Map.Entry<String, String> entry : stateMap.entrySet()) {
+ if(entry.getValue().equals("ONLINE")) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/MetaManagerDemo.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/MetaManagerDemo.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/MetaManagerDemo.java
new file mode 100644
index 0000000..d0be313
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/MetaManagerDemo.java
@@ -0,0 +1,457 @@
+package org.apache.helix.metamanager;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.I0Itec.zkclient.IDefaultNameSpace;
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkServer;
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.container.ContainerUtils;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.local.LocalContainerProvider;
+import org.apache.helix.metamanager.impl.local.LocalStatusProvider;
+import org.apache.helix.metamanager.impl.shell.ShellContainerProvider;
+import org.apache.helix.metamanager.impl.shell.ShellStatusProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnApplicationProperties;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnStatusProvider;
+import org.apache.helix.metamanager.provider.ProviderProcess;
+import org.apache.helix.metamanager.provider.ProviderRebalancer;
+import org.apache.helix.model.ExternalView;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.tools.ClusterStateVerifier;
+import org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+public class MetaManagerDemo
+{
+ static final long TIMESTEP_INTERVAL = 1000;
+
+ static final String PROVIDER_LOCAL = "LOCAL";
+ static final String PROVIDER_SHELL = "SHELL";
+ static final String PROVIDER_YARN = "YARN";
+
+ static final Logger log = Logger.getLogger(MetaManagerDemo.class);
+
+ static final int zkPort = 2199;
+ static final String zkAddress = "localhost:" + zkPort;
+ static final String metaClusterName = "meta-cluster";
+ static final String managedClusterName = "managed-cluster";
+ static final String metaResourceName = "container";
+ static final String managedResourceName = "database";
+
+ static final int numContainerProviders = 3;
+ static final int numContainerMax = 7;
+ static final int numContainerMin = 3;
+ static final int numContainerStep = 2;
+ static final int numContainerReplica = 1;
+
+ static final int numManagedPartitions = 10;
+ static final int numManagedReplica = 2;
+
+ static List<ContainerProvider> providers = new ArrayList<ContainerProvider>();
+ static int providerCount = 0;
+
+ static Collection<Service> services = new ArrayList<Service>();
+
+ /**
+ * LockManagerDemo clusterName, numInstances, lockGroupName, numLocks
+ *
+ * @param args
+ * @throws Exception
+ */
+ public static void main(String[] args) throws Exception
+ {
+
+ String containerProviderType = PROVIDER_LOCAL;
+ if(args.length >= 1) {
+ containerProviderType = args[0];
+ }
+
+ StaticTargetProvider targetProvider = null;
+ ProviderProcess[] managerProcesses = new ProviderProcess[numContainerProviders];
+
+ HelixManager metaControllerManager = null;
+ HelixManager managedControllerManager = null;
+
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ log.info("Destroying containers");
+ for (ContainerProvider provider : providers) {
+ provider.destroyAll();
+ }
+ log.info("Stopping services");
+ for (Service service : services) {
+ try { service.stop(); } catch (Exception ignore) {}
+ }
+ }
+ }));
+
+ try
+ {
+ log.info("Starting ZooKeeper");
+ startLocalZookeeper();
+ HelixAdmin admin = new ZKHelixAdmin(zkAddress);
+
+ log.info("Create clusters");
+ admin.addCluster(metaClusterName, true);
+ admin.addCluster(managedClusterName, true);
+
+ log.info("Create providers");
+ targetProvider = startService(new StaticTargetProvider(Collections.singletonMap(metaResourceName, numContainerMin)));
+ StatusProvider statusProvider = startService(createContainerStatusProvider(containerProviderType));
+
+ log.info("Setup config tool");
+ ConfigTool.setClusterStatusProvider(targetProvider);
+ ConfigTool.setContainerStatusProvider(statusProvider);
+
+ // Managed Cluster
+ log.info("Setup managed cluster");
+ admin.addStateModelDef(managedClusterName, "MasterSlave",
+ new StateModelDefinition(StateModelConfigGenerator.generateConfigForMasterSlave()));
+ admin.addResource(managedClusterName, managedResourceName, numManagedPartitions,
+ "MasterSlave", RebalanceMode.FULL_AUTO.toString());
+
+ IdealState managedIdealState = admin.getResourceIdealState(managedClusterName, managedResourceName);
+ managedIdealState.setInstanceGroupTag(metaResourceName);
+ managedIdealState.setReplicas(String.valueOf(numManagedReplica));
+ admin.setResourceIdealState(managedClusterName, managedResourceName, managedIdealState);
+
+ // Meta Cluster
+ log.info("Setup meta cluster");
+ admin.addStateModelDef(metaClusterName, "OnlineOffline",
+ new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+ admin.addResource(metaClusterName, metaResourceName, targetProvider.getTargetContainerCount(metaResourceName),
+ "OnlineOffline", RebalanceMode.USER_DEFINED.toString());
+
+ IdealState metaIdealState = admin.getResourceIdealState(metaClusterName, metaResourceName);
+ metaIdealState.setRebalancerClassName(ProviderRebalancer.class.getName());
+ metaIdealState.setReplicas("1");
+ admin.setResourceIdealState(metaClusterName, metaResourceName, metaIdealState);
+
+ log.info("Starting meta processes (container providers)");
+ for (int i = 0; i < numContainerProviders; i++)
+ {
+ String instanceName = "provider_" + i;
+ admin.addInstance(metaClusterName, new InstanceConfig(instanceName));
+
+ ClusterAdmin clusterAdmin = new HelixClusterAdmin(managedClusterName, admin);
+
+ managerProcesses[i] = new ProviderProcess(metaClusterName, zkAddress,
+ instanceName, startService(createContainerProvider(containerProviderType)), clusterAdmin);
+ managerProcesses[i].start();
+ }
+
+ log.info("Starting managed cluster controller");
+ managedControllerManager = HelixControllerMain.startHelixController(zkAddress,
+ managedClusterName, "managedController", HelixControllerMain.STANDALONE);
+ log.info("Starting meta cluster controller");
+ metaControllerManager = HelixControllerMain.startHelixController(zkAddress,
+ metaClusterName, "metaController", HelixControllerMain.STANDALONE);
+
+ waitUntilRebalancedCount(numContainerMin, admin);
+ printStep("Initial cluster state", admin);
+
+ while(targetProvider.getTargetContainerCount(metaResourceName) < numContainerMax) {
+ int newCount = targetProvider.getTargetContainerCount(metaResourceName) + numContainerStep;
+
+ log.info(String.format("Increasing container count to %d", newCount));
+ targetProvider.setTargetContainerCount(metaResourceName, newCount);
+
+ triggerPipeline(admin);
+ waitUntilRebalancedCount(newCount, admin);
+ printStep(String.format("Increased container count to %d", newCount), admin);
+ }
+
+ log.info("Destroying container 0 and container 1");
+ int currentCount = targetProvider.getTargetContainerCount(metaResourceName);
+ providers.get(0).destroy("container_0");
+ providers.get(0).destroy("container_1");
+ triggerPipeline(admin);
+ waitUntilRebalancedCount(currentCount, admin);
+ printStep("Destroyed container 0 and container 1", admin);
+
+ log.info("Destroying container provider 0");
+ currentCount = targetProvider.getTargetContainerCount(metaResourceName);
+ managerProcesses[0].stop();
+ waitUntilRebalancedCount(currentCount, admin);
+ printStep("Destroyed container provider 0", admin);
+
+ while(targetProvider.getTargetContainerCount(metaResourceName) > numContainerMin) {
+ int newCount = targetProvider.getTargetContainerCount(metaResourceName) - numContainerStep;
+
+ log.info(String.format("Decreasing container count to %d", newCount));
+ targetProvider.setTargetContainerCount(metaResourceName, newCount);
+
+ triggerPipeline(admin);
+ waitUntilRebalancedCount(newCount, admin);
+ printStep(String.format("Decreased container count to %d", targetProvider.getTargetContainerCount(metaResourceName)), admin);
+ }
+
+ log.info("Stopping processes");
+
+ } catch (Exception e)
+ {
+ e.printStackTrace();
+ } finally
+ {
+ if (managedControllerManager != null) {
+ log.info("Disconnecting managed cluster controller");
+ managedControllerManager.disconnect();
+ }
+ if (metaControllerManager != null) {
+ log.info("Disconnecting meta cluster controller");
+ metaControllerManager.disconnect();
+ }
+ log.info("Destroying meta processes");
+ for (ProviderProcess process : managerProcesses) {
+ process.stop();
+ }
+ }
+
+ // TODO clean up threads correctly
+ System.exit(0);
+ }
+
+private static void triggerPipeline(HelixAdmin admin) {
+ IdealState poke = admin.getResourceIdealState(metaClusterName, metaResourceName);
+ admin.setResourceIdealState(metaClusterName, metaResourceName, poke);
+}
+
+ private static void printStep(String text, HelixAdmin admin) throws Exception {
+ log.info("********************************************************************************");
+ log.info(text);
+ log.info("********************************************************************************");
+ printClusterStatus(admin);
+
+ System.out.println("Press ENTER to continue");
+ System.in.read();
+ }
+
+ static void printClusterStatus(HelixAdmin admin) throws Exception {
+ log.info("Managed cluster status");
+ printStatusMasterSlave(admin);
+ log.info("Meta cluster status");
+ printMetaClusterStatus(admin);
+ }
+
+ static void waitUntilRebalancedCount(int containerCount, HelixAdmin admin) throws InterruptedException {
+ Thread.sleep(TIMESTEP_INTERVAL);
+ while(containerCount != getMetaContainerCount(admin) ||
+ containerCount != getManagedContainerCount(admin)) {
+ Thread.sleep(TIMESTEP_INTERVAL);
+ }
+ ClusterStateVerifier.verifyByPolling(new BestPossAndExtViewZkVerifier(zkAddress, managedClusterName));
+ }
+
+ static int getMetaContainerCount(HelixAdmin admin) {
+ Set<String> assignedInstances = new HashSet<String>();
+
+ ExternalView externalView = admin.getResourceExternalView(metaClusterName, metaResourceName);
+
+ for (String partitionName : externalView.getPartitionSet())
+ {
+ Map<String, String> stateMap = externalView.getStateMap(partitionName);
+ if(stateMap == null)
+ continue;
+
+ for(String instanceName : stateMap.keySet()){
+ if ("ONLINE".equals(stateMap.get(instanceName))) {
+ assignedInstances.add(partitionName);
+ break;
+ }
+ }
+ }
+
+ return assignedInstances.size();
+ }
+
+ static int getManagedContainerCount(HelixAdmin admin) {
+ Set<String> assignedInstances = new HashSet<String>();
+
+ ExternalView externalView = admin.getResourceExternalView(managedClusterName, managedResourceName);
+
+ for (String partitionName : externalView.getPartitionSet())
+ {
+ Map<String, String> stateMap = externalView.getStateMap(partitionName);
+ if(stateMap == null)
+ continue;
+
+ for(String instanceName : stateMap.keySet()){
+ if ("MASTER".equals(stateMap.get(instanceName)) ||
+ "SLAVE".equals(stateMap.get(instanceName))) {
+ assignedInstances.add(instanceName);
+ }
+ }
+ }
+
+ return assignedInstances.size();
+ }
+
+ static void printMetaClusterStatus(HelixAdmin admin)
+ {
+ ExternalView externalView = admin
+ .getResourceExternalView(metaClusterName, metaResourceName);
+ TreeSet<String> treeSet = new TreeSet<String>(
+ externalView.getPartitionSet());
+ log.info("container" + "\t" + "acquired by");
+ log.info("======================================");
+ for (String partitionName : treeSet)
+ {
+ Map<String, String> stateMap = externalView.getStateMap(partitionName);
+ String acquiredBy = null;
+ if (stateMap != null)
+ {
+ for(String instanceName:stateMap.keySet()){
+ if ("ONLINE".equals(stateMap.get(instanceName))){
+ acquiredBy = instanceName;
+ break;
+ }
+ }
+ }
+ log.info(partitionName + "\t"
+ + ((acquiredBy != null) ? acquiredBy : "NONE"));
+ }
+ }
+
+ static void printStatusMasterSlave(HelixAdmin admin)
+ {
+ ExternalView externalView = admin
+ .getResourceExternalView(managedClusterName, managedResourceName);
+ TreeSet<String> treeSet = new TreeSet<String>(
+ externalView.getPartitionSet());
+ log.info("partition" + "\t" + "master" + "\t\t" + "slave");
+ log.info("============================================================");
+ for (String partitionName : treeSet)
+ {
+ Map<String, String> stateMap = externalView.getStateMap(partitionName);
+ String master = "NONE";
+ String slave = "NONE";
+ if (stateMap != null)
+ {
+ for(String instanceName:stateMap.keySet()){
+ if ("MASTER".equals(stateMap.get(instanceName))){
+ master = instanceName;
+ }
+ if ("SLAVE".equals(stateMap.get(instanceName))){
+ slave = instanceName;
+ }
+ }
+ }
+ log.info(String.format("%s\t%s\t%s", partitionName, master, slave));
+ }
+ }
+
+ public static void startLocalZookeeper() throws Exception
+ {
+ ZkServer server = null;
+ String baseDir = "/tmp/metamanager/";
+ final String dataDir = baseDir + "zk/dataDir";
+ final String logDir = baseDir + "zk/logDir";
+ FileUtils.deleteDirectory(new File(dataDir));
+ FileUtils.deleteDirectory(new File(logDir));
+
+ IDefaultNameSpace defaultNameSpace = new IDefaultNameSpace()
+ {
+ @Override
+ public void createDefaultNameSpace(ZkClient zkClient)
+ {
+
+ }
+ };
+ server = new ZkServer(dataDir, logDir, defaultNameSpace, zkPort);
+ server.start();
+
+ }
+
+ private static ContainerProviderService createContainerProvider(String type) throws Exception {
+ String providerName = "provider_" + providerCount;
+ providerCount++;
+
+ if(PROVIDER_LOCAL.equalsIgnoreCase(type)) {
+ log.info("Using VM-local container provider");
+ LocalContainerProvider provider = new LocalContainerProvider(zkAddress, managedClusterName, providerName);
+ provider.registerType("container", ContainerUtils.getPropertiesFromResource("container.properties"));
+ providers.add(provider);
+ return provider;
+ } else if (PROVIDER_SHELL.equalsIgnoreCase(type)) {
+ log.info("Using shell-based container provider");
+ ShellContainerProvider provider = new ShellContainerProvider(zkAddress, managedClusterName, providerName);
+ provider.registerType("container", ContainerUtils.getPropertiesFromResource("container.properties"));
+ providers.add(provider);
+ return provider;
+ } else if (PROVIDER_YARN.equalsIgnoreCase(type)) {
+ YarnApplicationProperties properties = new YarnApplicationProperties();
+ properties.put(YarnApplicationProperties.HELIX_CLUSTER, managedClusterName);
+ properties.put(YarnApplicationProperties.HELIX_ZOOKEEPER, zkAddress);
+ properties.put(YarnApplicationProperties.PROVIDER_METADATA, zkAddress);
+ properties.put(YarnApplicationProperties.PROVIDER_NAME, providerName);
+
+ log.info("Using yarn-based container provider");
+ YarnContainerProvider yarnProvider = new YarnContainerProvider(properties);
+ yarnProvider.registerType("container", ContainerUtils.getPropertiesFromResource("container.properties"));
+
+ providers.add(yarnProvider);
+ return yarnProvider;
+ } else {
+ throw new IllegalArgumentException(String.format("Unknown container provider type '%s'", type));
+ }
+ }
+
+ private static StatusProviderService createContainerStatusProvider(String type) throws Exception {
+ if(PROVIDER_LOCAL.equalsIgnoreCase(type)) {
+ log.info("Using VM-local container status provider");
+ return new LocalStatusProvider();
+ } else if (PROVIDER_SHELL.equalsIgnoreCase(type)) {
+ log.info("Using shell-based container status provider");
+ return new ShellStatusProvider();
+ } else if (PROVIDER_YARN.equalsIgnoreCase(type)) {
+ log.info("Using yarn-based container status provider");
+ return new YarnStatusProvider(zkAddress);
+ } else {
+ throw new IllegalArgumentException(String.format("Unknown container status provider type '%s'", type));
+ }
+ }
+
+ private static <T extends Service> T startService(T service) throws Exception {
+ service.start();
+ services.add(service);
+ return service;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/Service.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/Service.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/Service.java
new file mode 100644
index 0000000..c13a62e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/Service.java
@@ -0,0 +1,38 @@
+package org.apache.helix.metamanager;
+
+import java.util.Properties;
+
+/**
+ * Abstraction for configurable and runnable service. Light-weight dependency
+ * injection and life-cycle management.
+ *
+ */
+public interface Service {
+
+ /**
+ * Configure service internals<br/>
+ * <b>INVARIANT:</b> executed only once
+ *
+ * @param properties
+ * arbitrary key-value properties, parsed internally
+ * @throws Exception
+ */
+ void configure(Properties properties) throws Exception;
+
+ /**
+ * Start service.<br/>
+ * <b>PRECONDITION:</b> configure() was invoked<br/>
+ * <b>INVARIANT:</b> executed only once
+ *
+ * @throws Exception
+ */
+ void start() throws Exception;
+
+ /**
+ * Stop service.<br/>
+ * <b>INVARIANT:</b> idempotent
+ *
+ * @throws Exception
+ */
+ void stop() throws Exception;
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StaticStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StaticStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StaticStatusProvider.java
new file mode 100644
index 0000000..249b9b8
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StaticStatusProvider.java
@@ -0,0 +1,28 @@
+package org.apache.helix.metamanager;
+
+import java.util.HashMap;
+import java.util.Map;
+
+
+public class StaticStatusProvider implements ClusterStatusProvider {
+
+ final Map<String, Integer> targetCounts = new HashMap<String, Integer>();
+
+ public StaticStatusProvider() {
+ // left blank
+ }
+
+ public StaticStatusProvider(Map<String, Integer> targetCounts) {
+ this.targetCounts.putAll(targetCounts);
+ }
+
+ @Override
+ public int getTargetContainerCount(String containerType) {
+ return targetCounts.get(containerType);
+ }
+
+ public void setTargetContainerCount(String containerType, int targetCount) {
+ targetCounts.put(containerType, targetCount);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StatusProvider.java
new file mode 100644
index 0000000..841f08d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StatusProvider.java
@@ -0,0 +1,35 @@
+package org.apache.helix.metamanager;
+
+import org.apache.helix.metamanager.provider.ProviderRebalancer;
+
+/**
+ * Abstraction for status reader of container deployment framework. Provides
+ * information on physical existence of container and activity or failure state.
+ * Is polled by ProviderRebalancer and should be light-weight and non-blocking.<br/>
+ * <b>NOTE:</b> This information is solely based on the low-level framework and
+ * may be different from the participant state in Helix. (The Helix participant
+ * may not even exist)
+ *
+ * @see ProviderRebalancer
+ */
+public interface StatusProvider {
+
+ /**
+ * Determine whether container physically exists.
+ *
+ * @param id
+ * unique container id
+ * @return true, if container is present
+ */
+ public boolean exists(String id);
+
+ /**
+ * Determine whether container is healthy as determined by the deployment
+ * framework.
+ *
+ * @param id
+ * unique container id
+ * @return true, if container is healthy
+ */
+ public boolean isHealthy(String id);
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StatusProviderService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StatusProviderService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StatusProviderService.java
new file mode 100644
index 0000000..3c2739d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/StatusProviderService.java
@@ -0,0 +1,9 @@
+package org.apache.helix.metamanager;
+
+/**
+ * StatusProvider as configurable service.
+ *
+ */
+public interface StatusProviderService extends StatusProvider, Service {
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/TargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/TargetProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/TargetProvider.java
new file mode 100644
index 0000000..22524c4
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/TargetProvider.java
@@ -0,0 +1,25 @@
+package org.apache.helix.metamanager;
+
+import org.apache.helix.metamanager.provider.ProviderRebalancer;
+
+/**
+ * Abstraction for target computation and statistics collection. Provides target
+ * count of containers for ProviderRebalancer. Is polled by ProviderRebalancer
+ * and should be light-weight and non-blocking.<br/>
+ * <b>NOTE:</b> The target count is oblivious of failed containers and can be
+ * obtained in an arbitrary way. See implementations for examples.
+ *
+ * @see ProviderRebalancer
+ */
+public interface TargetProvider {
+
+ /**
+ * Return target count of containers of a specific type.
+ *
+ * @param containerType
+ * meta resource name
+ * @return container count >= 1
+ * @throws Exception
+ */
+ public int getTargetContainerCount(String containerType) throws Exception;
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/TargetProviderService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/TargetProviderService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/TargetProviderService.java
new file mode 100644
index 0000000..4d6275e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/TargetProviderService.java
@@ -0,0 +1,9 @@
+package org.apache.helix.metamanager;
+
+/**
+ * TargetProvider as configurable service.
+ *
+ */
+public interface TargetProviderService extends TargetProvider, Service {
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ZookeeperSetter.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ZookeeperSetter.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ZookeeperSetter.java
new file mode 100644
index 0000000..39e20fe
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/ZookeeperSetter.java
@@ -0,0 +1,30 @@
+package org.apache.helix.metamanager;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.log4j.Logger;
+
+/**
+ * Utility for setting String values in the embedded zookeeper service.
+ * (Program entry point)
+ *
+ */
+public class ZookeeperSetter {
+
+ static Logger log = Logger.getLogger(ZookeeperSetter.class);
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) {
+ String address = args[0];
+ String path = args[1];
+ String value = args[2];
+
+ log.info(String.format("Setting %s:%s to '%s'", address, path, value));
+
+ ZkClient client = new ZkClient(address);
+ client.createPersistent(path, true);
+ client.writeData(path, value);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/BootUtil.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/BootUtil.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/BootUtil.java
new file mode 100644
index 0000000..004de06
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/BootUtil.java
@@ -0,0 +1,58 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.log4j.Logger;
+
+public class BootUtil {
+
+ public static final String CLASS_PROPERTY = "class";
+ static final Logger log = Logger.getLogger(BootUtil.class);
+
+ public static Properties getNamespace(String namespace, Properties source) {
+ Properties dest = new Properties();
+ String prefix = namespace + ".";
+
+ for(Map.Entry<Object, Object> rawEntry : source.entrySet()) {
+ String key = (String)rawEntry.getKey();
+ String value = (String)rawEntry.getValue();
+
+ if(key.startsWith(prefix)) {
+ String newKey = key.substring(prefix.length());
+ dest.put(newKey, value);
+ }
+ }
+
+ return dest;
+ }
+
+ @SuppressWarnings("unchecked")
+ public static <T> T createInstance(Properties properties) throws Exception {
+ String className = properties.getProperty(CLASS_PROPERTY);
+
+ Class<?> containerClass = Class.forName(className);
+
+ try {
+ log.debug(String.format("checking for properties constructor in class '%s'", className));
+ return (T)containerClass.getConstructor(ContainerProcessProperties.class).newInstance(properties);
+ } catch (Exception e) {
+ log.debug("no properties constructor found");
+ }
+
+ try {
+ log.debug(String.format("checking for default constructor in class '%s'", className));
+ return (T)containerClass.getConstructor().newInstance();
+ } catch (Exception e) {
+ log.debug("no default constructor found");
+ }
+
+ throw new Exception(String.format("no suitable constructor for class '%s'", className));
+ }
+
+ private BootUtil() {
+ // left blank
+ }
+
+}
[03/15] Adding Helix-task-framework and Yarn integration modules
Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMasterService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMasterService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMasterService.java
new file mode 100644
index 0000000..b63be1f
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnMasterService.java
@@ -0,0 +1,361 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ContainerManager;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.records.AMResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.provider.yarn.ContainerMetadata.ContainerState;
+import org.apache.helix.metamanager.provider.yarn.MetadataService.MetadataServiceException;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Maps;
+
+public class YarnMasterService {
+
+ static final Logger log = Logger.getLogger(YarnMasterService.class);
+
+ static final String REQUIRED_TYPE = "container";
+
+ static final long ZOOKEEPER_TIMEOUT = 5000;
+
+ static final long MASTERSERVICE_INTERVAL = 1000;
+
+ static final String CONTAINERS = "CONTAINERS";
+
+ static final String CONTAINER_COMMAND = "/bin/sh %s %s %s %s %s %s 1>%s/stdout 2>%s/stderr";
+
+ /*
+ * CONTAINERS
+ * A (A, READY)
+ * B (B, RUNNING)
+ */
+
+ final ApplicationConfig appConfig;
+ final AMRMProtocol yarnClient;
+ final ApplicationAttemptId appAtemptId;
+
+ final Configuration yarnConfig;
+
+ final File dummy = new File("/tmp/dummy");
+
+ final Map<ContainerId, Container> unassignedContainers = new HashMap<ContainerId, Container>();
+ final Map<ContainerId, Container> activeContainers = new HashMap<ContainerId, Container>();
+ final Map<ContainerId, ContainerStatus> completedContainers = new HashMap<ContainerId, ContainerStatus>();
+ final Map<ContainerId, String> yarn2meta = new HashMap<ContainerId, String>();
+
+ final MetadataService metaService;
+
+ ScheduledExecutorService executor;
+
+ public YarnMasterService(AMRMProtocol yarnClient, Configuration conf, ApplicationAttemptId appAttemptId, ApplicationConfig appConfig, MetadataService metaService) {
+ this.appConfig = appConfig;
+ this.yarnClient = yarnClient;
+ this.appAtemptId = appAttemptId;
+ this.yarnConfig = conf;
+ this.metaService = metaService;
+ }
+
+ public void startService() {
+ log.debug("starting yarn master service");
+
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new YarnService(), 0, MASTERSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+ }
+
+ public void stopService() {
+ log.debug("stopping yarn master service");
+
+ if(executor != null) {
+ executor.shutdown();
+ while(!executor.isTerminated()) {
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ }
+ executor = null;
+ }
+ }
+
+ Collection<ContainerMetadata> readOwnedMetadata() throws MetadataServiceException {
+ log.debug("reading container data");
+
+ Collection<ContainerMetadata> containers = new ArrayList<ContainerMetadata>();
+ for(ContainerMetadata meta : metaService.readAll()) {
+ if(meta.owner.equals(appConfig.providerName)) {
+ containers.add(meta);
+ log.debug(String.format("found container node '%s' (state=%s, yarnId=%s, command=%s, owner=%s)",
+ meta.id, meta.state, meta.yarnId, meta.command, meta.owner));
+ }
+ }
+ return containers;
+ }
+
+ class YarnService implements Runnable {
+ int responseId = 0;
+
+ @Override
+ public void run() {
+ try {
+ log.debug("running yarn service update cycle");
+
+ Collection<ContainerMetadata> metadata = readOwnedMetadata();
+
+ // active meta containers
+ int numMetaActive = countActiveMeta(metadata);
+
+ // newly acquired meta containers
+ int numMetaAcquire = countAcquireMeta(metadata);
+
+ // destroyed meta containers
+ List<ContainerId> destroyedReleasedIds = createDestroyedReleaseList(metadata);
+ int numMetaCompleted = destroyedReleasedIds.size();
+
+ int numMeta = numMetaAcquire + numMetaActive + numMetaCompleted;
+
+ // yarn containers
+ int numYarnUnassigned = unassignedContainers.size();
+ int numYarnActive = activeContainers.size();
+ int numYarnCompleted = completedContainers.size();
+ int numYarn = numYarnUnassigned + numYarnActive + numYarnCompleted;
+
+ int numYarnRequired = numMetaAcquire - numYarnUnassigned;
+
+ // additionally required containers
+ int numRequestAdditional = Math.max(0, numYarnRequired);
+
+ // overstock containers
+ List<ContainerId> unneededReleasedIds = createOverstockReleaseList(numYarnRequired);
+
+ int numReleased = destroyedReleasedIds.size() + unneededReleasedIds.size();
+
+ log.debug(String.format("meta containers (total=%d, acquire=%d, active=%d, completed=%d)", numMeta, numMetaAcquire, numMetaActive, numMetaCompleted));
+ log.debug(String.format("yarn containers (total=%d, unassigned=%d, active=%d, completed=%d)", numYarn, numYarnUnassigned, numYarnActive, numYarnCompleted));
+ log.debug(String.format("requesting %d new containers, releasing %d", numRequestAdditional, numReleased));
+
+ Priority priority = Records.newRecord(Priority.class);
+ priority.setPriority(0);
+
+ Resource resource = Records.newRecord(Resource.class);
+ resource.setMemory(256); // TODO make dynamic
+
+ ResourceRequest resourceRequest = Records.newRecord(ResourceRequest.class);
+ resourceRequest.setHostName("*");
+ resourceRequest.setNumContainers(numRequestAdditional);
+ resourceRequest.setPriority(priority);
+ resourceRequest.setCapability(resource);
+
+ AllocateRequest request = Records.newRecord(AllocateRequest.class);
+ request.setResponseId(responseId);
+ request.setApplicationAttemptId(appAtemptId);
+ request.addAsk(resourceRequest);
+ request.addAllReleases(destroyedReleasedIds);
+ request.addAllReleases(unneededReleasedIds);
+
+ responseId++;
+
+ AllocateResponse allocateResponse = null;
+ try {
+ allocateResponse = yarnClient.allocate(request);
+ } catch (YarnRemoteException e) {
+ // ignore
+ log.error("Error allocating containers", e);
+ return;
+ }
+
+ AMResponse response = allocateResponse.getAMResponse();
+
+ // newly added containers
+ for(Container container : response.getAllocatedContainers()) {
+ unassignedContainers.put(container.getId(), container);
+ }
+
+ log.info(String.format("%d new containers available, %d required", unassignedContainers.size(), numMetaAcquire));
+
+ Iterator<Container> itYarn = unassignedContainers.values().iterator();
+ Iterator<ContainerMetadata> itMeta = metadata.iterator();
+ while(itYarn.hasNext() && itMeta.hasNext()) {
+ ContainerMetadata meta = itMeta.next();
+
+ if(meta.yarnId >= 0)
+ continue;
+
+ Container containerYarn = itYarn.next();
+
+ log.debug(String.format("assigning yarn container '%s' to container node '%s'", containerYarn.getId(), meta.id));
+
+ String command = String.format(CONTAINER_COMMAND, meta.command,
+ appConfig.clusterAddress, appConfig.clusterName, appConfig.metadataAddress, appConfig.providerName,
+ meta.id, "/tmp/" + meta.id, "/tmp/" + meta.id);
+ //ApplicationConstants.LOG_DIR_EXPANSION_VAR, ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+
+ ContainerLaunchContext context = Records.newRecord(ContainerLaunchContext.class);
+ context.setContainerId(containerYarn.getId());
+ context.setResource(containerYarn.getResource());
+ context.setEnvironment(Maps.<String, String>newHashMap());
+ context.setCommands(Collections.singletonList(command));
+ context.setLocalResources(Utils.getDummyResources());
+ try {
+ context.setUser(UserGroupInformation.getCurrentUser().getShortUserName());
+ } catch (IOException e) {
+ log.error(String.format("failed setting up container '%s' user information", meta.id));
+ return;
+ }
+
+ log.debug(String.format("container '%s' executing command '%s'", meta.id, command));
+
+ StartContainerRequest startReq = Records.newRecord(StartContainerRequest.class);
+ startReq.setContainerLaunchContext(context);
+
+ try {
+ getContainerManager(containerYarn).startContainer(startReq);
+
+ } catch (YarnRemoteException e) {
+ log.error(String.format("Error starting container '%s'", meta.id), e);
+ return;
+ }
+
+ log.debug(String.format("container '%s' started, updating container node", meta.id));
+
+ metaService.update(new ContainerMetadata(meta, ContainerState.CONNECTING, containerYarn.getId().getId()));
+ yarn2meta.put(containerYarn.getId(), meta.id);
+
+ log.debug(String.format("removing '%s' from unassigned yarn containers and adding to active list", containerYarn.getId()));
+
+ itYarn.remove();
+ activeContainers.put(containerYarn.getId(), containerYarn);
+
+ }
+
+ for(ContainerStatus status : response.getCompletedContainersStatuses()) {
+ ContainerId id = status.getContainerId();
+
+ log.info(String.format("Container '%s' completed", id));
+
+ if(unassignedContainers.containsKey(id)) {
+ log.info(String.format("Unassigned container '%s' terminated, removing", id));
+ unassignedContainers.remove(id);
+ // TODO handle
+ }
+
+ if(activeContainers.containsKey(id)) {
+ log.info(String.format("Active container '%s' terminated, removing", id));
+ activeContainers.remove(id);
+
+ String metaId = yarn2meta.get(id);
+ ContainerMetadata meta = metaService.read(metaId);
+
+ log.debug(String.format("container '%s' finalized, updating container node", meta.id));
+
+ metaService.update(new ContainerMetadata(meta, ContainerState.FINALIZE));
+ }
+
+ completedContainers.put(id, status);
+ }
+
+ log.debug("yarn service update cycle complete");
+
+ } catch (Exception e) {
+ log.error("Error while executing yarn update cycle", e);
+ }
+ }
+
+ private List<ContainerId> createOverstockReleaseList(int numYarnRequired) {
+ List<ContainerId> unneededReleasedIds = new ArrayList<ContainerId>();
+ Iterator<Container> itUnassigned = unassignedContainers.values().iterator();
+ if(numYarnRequired < 0) {
+ for(int i=0; i<-numYarnRequired && itUnassigned.hasNext(); i++) {
+ Container container = itUnassigned.next();
+ unneededReleasedIds.add(container.getId());
+ log.debug(String.format("Container '%s' no longer required, removing", container.getId()));
+ itUnassigned.remove();
+ }
+ }
+ return unneededReleasedIds;
+ }
+
+ private List<ContainerId> createDestroyedReleaseList(
+ Collection<ContainerMetadata> metadata) {
+ List<ContainerId> releasedIds = new ArrayList<ContainerId>();
+ for(ContainerMetadata meta : metadata) {
+ if(meta.state == ContainerState.HALTED) {
+ ContainerId containerId = Records.newRecord(ContainerId.class);
+ containerId.setApplicationAttemptId(appAtemptId);
+ containerId.setId(meta.yarnId);
+ releasedIds.add(containerId);
+ log.debug(String.format("releasing container '%s'", containerId));
+ }
+ }
+ return releasedIds;
+ }
+
+ private int countAcquireMeta(Collection<ContainerMetadata> metadata) {
+ int numMetaAcquire = 0;
+ for(ContainerMetadata meta : metadata) {
+ if(meta.state == ContainerState.ACQUIRE) {
+ numMetaAcquire++;
+ }
+ }
+ return numMetaAcquire;
+ }
+
+ private int countActiveMeta(Collection<ContainerMetadata> metadata) {
+ int numMetaActive = 0;
+ for(ContainerMetadata meta : metadata) {
+ if(meta.state != ContainerState.ACQUIRE &&
+ meta.state != ContainerState.HALTED &&
+ meta.state != ContainerState.FINALIZE) {
+ numMetaActive++;
+ }
+ }
+ return numMetaActive;
+ }
+ }
+
+ private ContainerManager getContainerManager(Container container) {
+ YarnConfiguration yarnConf = new YarnConfiguration(yarnConfig);
+ YarnRPC rpc = YarnRPC.create(yarnConf);
+ NodeId nodeId = container.getNodeId();
+ String containerIpPort = String.format("%s:%d", nodeId.getHost(),
+ nodeId.getPort());
+ log.info("Connecting to ContainerManager at: " + containerIpPort);
+ InetSocketAddress addr = NetUtils.createSocketAddr(containerIpPort);
+ ContainerManager cm = (ContainerManager) rpc.getProxy(
+ ContainerManager.class, addr, yarnConfig);
+ return cm;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnProcess.java
new file mode 100644
index 0000000..b1a22d5
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/YarnProcess.java
@@ -0,0 +1,171 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.metamanager.managed.ManagedFactory;
+import org.apache.helix.metamanager.provider.yarn.ContainerMetadata.ContainerState;
+import org.apache.log4j.Logger;
+
+public class YarnProcess {
+ static final Logger log = Logger.getLogger(YarnProcess.class);
+
+ static final long CONTAINERSERVICE_INTERVAL = 1000;
+
+ final ApplicationConfig appConfig;
+ final String containerId;
+
+ HelixManager participantManager;
+
+ MetadataService metaService;
+ ScheduledExecutorService executor;
+
+
+ public YarnProcess(ApplicationConfig appConfig, String containerId) {
+ this.appConfig = appConfig;
+ this.containerId = containerId;
+ }
+
+ public void startService() {
+ log.info(String.format("start metadata service for '%s'", containerId));
+ metaService = new MetadataService(appConfig.metadataAddress);
+ metaService.start();
+
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new ContainerService(), 0, CONTAINERSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+ }
+
+ public void stopService() {
+ log.info(String.format("stop metadata service for '%s'", containerId));
+ if (metaService != null) {
+ metaService.stop();
+ metaService = null;
+ }
+
+ if(executor != null) {
+ executor.shutdown();
+ }
+ }
+
+ public boolean isRunning() {
+ if(executor == null)
+ return false;
+ return !executor.isTerminated();
+ }
+
+ public void startParticipant() throws Exception {
+ log.info("STARTING " + containerId);
+ participantManager = HelixManagerFactory.getZKHelixManager(appConfig.clusterName,
+ containerId, InstanceType.PARTICIPANT, appConfig.clusterAddress);
+ participantManager.getStateMachineEngine().registerStateModelFactory(
+ "MasterSlave", new ManagedFactory());
+ participantManager.connect();
+ log.info("STARTED " + containerId);
+ }
+
+ public void stopParticipant() {
+ if (participantManager != null) {
+ participantManager.disconnect();
+ participantManager = null;
+ }
+ }
+
+ public void updateContainerStatus() {
+ log.info("updating container status");
+ try {
+ ContainerMetadata meta = metaService.read(containerId);
+
+ if(meta.state == ContainerState.CONNECTING) {
+ log.info("container connecting, going to active");
+ try {
+ startParticipant();
+ metaService.update(new ContainerMetadata(meta, ContainerState.ACTIVE));
+ } catch (Exception e) {
+ log.error("Failed to start participant, going to failed", e);
+ stopParticipant();
+ metaService.update(new ContainerMetadata(meta, ContainerState.FAILED));
+ }
+ }
+
+ if(meta.state == ContainerState.ACTIVE) {
+ // do something
+ // and go to failed on error
+ }
+
+ if(meta.state == ContainerState.TEARDOWN) {
+ log.info("container teardown, going to halted");
+ stopParticipant();
+ metaService.update(new ContainerMetadata(meta, ContainerState.HALTED));
+ stopService();
+ }
+
+ } catch(Exception e) {
+ log.warn(String.format("Container '%s' does not exist, stopping service", containerId));
+ stopService();
+ }
+ }
+
+ class ContainerService implements Runnable {
+ @Override
+ public void run() {
+ updateContainerStatus();
+ }
+ }
+
+ public static void main(String[] args) throws Exception
+ {
+ log.trace("BEGIN YarnProcess.main()");
+
+ final String clusterAddress = args[0];
+ final String clusterName = args[1];
+ final String metadataAddress = args[2];
+ final String providerName = args[3];
+ final String containerId = args[4];
+
+ final ApplicationConfig appConfig = new ApplicationConfig(clusterAddress, clusterName, metadataAddress, providerName);
+
+ final YarnProcess yarnProcess = new YarnProcess(appConfig, containerId);
+
+ yarnProcess.startService();
+
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ yarnProcess.stopService();
+ }
+ }));
+
+ while(yarnProcess.isRunning()) {
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ }
+
+ log.trace("END YarnProcess.main()");
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ZookeeperMetadataService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ZookeeperMetadataService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ZookeeperMetadataService.java
new file mode 100644
index 0000000..00bf17f
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/provider/yarn/ZookeeperMetadataService.java
@@ -0,0 +1,102 @@
+package org.apache.helix.metamanager.provider.yarn;
+
+import java.util.ArrayList;
+import java.util.Collection;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.exception.ZkException;
+import org.apache.log4j.Logger;
+
+public class ZookeeperMetadataService implements MetadataService {
+
+ static final Logger log = Logger.getLogger(ZookeeperMetadataService.class);
+
+ static final String CONTAINER_NAMESPACE = "containers";
+
+ static final String BASE_PATH = "/" + CONTAINER_NAMESPACE;
+
+ static final long POLL_INTERVAL = 100;
+
+ final String metadataAddress;
+
+ ZkClient client;
+
+ public ZookeeperMetadataService(String metadataAddress) {
+ this.metadataAddress = metadataAddress;
+ }
+
+ public void startService() {
+ log.debug(String.format("starting metadata service for '%s'", metadataAddress));
+
+ client = new ZkClient(metadataAddress);
+
+ client.createPersistent(BASE_PATH, true);
+ }
+
+ public void stopService() {
+ log.debug(String.format("stopping metadata service for '%s'", metadataAddress));
+ if(client != null) {
+ client.close();
+ client = null;
+ }
+ }
+
+ @Override
+ public boolean exists(String id) {
+ return client.exists(makePath(id));
+ }
+
+ @Override
+ public void create(ContainerMetadata meta) throws MetadataServiceException {
+ try {
+ client.createPersistent(makePath(meta.id), Utils.toJson(meta));
+ } catch (ZkException e) {
+ throw new MetadataServiceException(e);
+ }
+ }
+
+ @Override
+ public ContainerMetadata read(String id) throws MetadataServiceException {
+ try {
+ return Utils.fromJson(client.<String>readData(makePath(id)));
+ } catch (ZkException e) {
+ throw new MetadataServiceException(e);
+ }
+ }
+
+ @Override
+ public Collection<ContainerMetadata> readAll() throws MetadataServiceException {
+ try {
+ Collection<ContainerMetadata> metadata = new ArrayList<ContainerMetadata>();
+ for(String id : client.getChildren(BASE_PATH)) {
+ metadata.add(Utils.fromJson(client.<String>readData(makePath(id))));
+ }
+ return metadata;
+ } catch (ZkException e) {
+ throw new MetadataServiceException(e);
+ }
+ }
+
+ @Override
+ public void update(ContainerMetadata meta) throws MetadataServiceException {
+ try {
+ client.writeData(makePath(meta.id), Utils.toJson(meta));
+ } catch (ZkException e) {
+ throw new MetadataServiceException(e);
+ }
+ }
+
+ @Override
+ public void delete(String id) throws MetadataServiceException {
+ try {
+ client.delete(makePath(id));
+ } catch (ZkException e) {
+ throw new MetadataServiceException(e);
+ }
+ }
+
+ String makePath(String containerId) {
+ return BASE_PATH + "/" + containerId;
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ApplicationConfig.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ApplicationConfig.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ApplicationConfig.java
new file mode 100644
index 0000000..5950d42
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ApplicationConfig.java
@@ -0,0 +1,32 @@
+package org.apache.helix.metamanager.yarn;
+
+public class ApplicationConfig {
+ final String clusterAddress;
+ final String clusterName;
+ final String providerAddress;
+ final String providerName;
+
+ public ApplicationConfig(String clusterAddress, String clusterName,
+ String providerAddress, String providerName) {
+ this.clusterAddress = clusterAddress;
+ this.clusterName = clusterName;
+ this.providerAddress = providerAddress;
+ this.providerName = providerName;
+ }
+
+ public String getClusterAddress() {
+ return clusterAddress;
+ }
+
+ public String getClusterName() {
+ return clusterName;
+ }
+
+ public String getProviderAddress() {
+ return providerAddress;
+ }
+
+ public String getProviderName() {
+ return providerName;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ContainerMetadata.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ContainerMetadata.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ContainerMetadata.java
new file mode 100644
index 0000000..1245080
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ContainerMetadata.java
@@ -0,0 +1,50 @@
+package org.apache.helix.metamanager.yarn;
+
+
+class ContainerMetadata {
+
+ static enum ContainerState {
+ ACQUIRE,
+ CONNECTING,
+ ACTIVE,
+ TEARDOWN,
+ FAILED,
+ HALTED,
+ FINALIZE
+ }
+
+ String id;
+ ContainerState state;
+ int yarnId;
+ String command;
+ String owner;
+
+ public ContainerMetadata() {
+ // left blank
+ }
+
+ public ContainerMetadata(String id, String command, String owner) {
+ this.id = id;
+ this.state = ContainerState.ACQUIRE;
+ this.yarnId = -1;
+ this.command = command;
+ this.owner = owner;
+ }
+
+ public ContainerMetadata(ContainerMetadata node, ContainerState state) {
+ this.id = node.id;
+ this.state = state;
+ this.yarnId = node.yarnId;
+ this.command = node.command;
+ this.owner = node.owner;
+ }
+
+ public ContainerMetadata(ContainerMetadata node, ContainerState state, int yarnId) {
+ this.id = node.id;
+ this.state = state;
+ this.yarnId = yarnId;
+ this.command = node.command;
+ this.owner = node.owner;
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ContainerNode.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ContainerNode.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ContainerNode.java
new file mode 100644
index 0000000..59b9325
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/ContainerNode.java
@@ -0,0 +1,61 @@
+package org.apache.helix.metamanager.yarn;
+
+import java.io.Serializable;
+
+import org.apache.hadoop.yarn.api.records.ContainerId;
+
+
+class ContainerNode implements Serializable {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 2578978959080378923L;
+
+ static enum ContainerState {
+ ACQUIRE,
+ CONNECT,
+ READY,
+ STARTING,
+ RUNNING,
+ STOPPING,
+ TEARDOWN,
+ FINALIZE
+ }
+
+ final String id;
+ final ContainerState state;
+ final ContainerId yarnId;
+
+ final String zkAddress;
+ final String clusterName;
+ final String command;
+
+ public ContainerNode(String id, String zkAddress, String clusterName, String command) {
+ this.id = id;
+ this.state = ContainerState.ACQUIRE;
+ this.yarnId = null;
+ this.zkAddress = zkAddress;
+ this.clusterName = clusterName;
+ this.command = command;
+ }
+
+ public ContainerNode(ContainerNode node, ContainerState state) {
+ this.id = node.id;
+ this.state = state;
+ this.yarnId = node.yarnId;
+ this.zkAddress = node.zkAddress;
+ this.clusterName = node.clusterName;
+ this.command = node.command;
+ }
+
+ public ContainerNode(ContainerNode node, ContainerState state, ContainerId yarnId) {
+ this.id = node.id;
+ this.state = state;
+ this.yarnId = yarnId;
+ this.zkAddress = node.zkAddress;
+ this.clusterName = node.clusterName;
+ this.command = node.command;
+ }
+
+}
+
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/MessageNode.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/MessageNode.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/MessageNode.java
new file mode 100644
index 0000000..ba5be81
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/MessageNode.java
@@ -0,0 +1,20 @@
+package org.apache.helix.metamanager.yarn;
+
+
+class MessageNode {
+ static enum MessageType {
+ CREATE,
+ START,
+ STOP,
+ DESTROY
+ }
+
+ final String id;
+ final MessageType type;
+
+ public MessageNode(String id, MessageType type) {
+ this.id = id;
+ this.type = type;
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/MetadataService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/MetadataService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/MetadataService.java
new file mode 100644
index 0000000..be88826
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/MetadataService.java
@@ -0,0 +1,146 @@
+package org.apache.helix.metamanager.yarn;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.concurrent.TimeoutException;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.exception.ZkException;
+import org.apache.helix.metamanager.yarn.ContainerMetadata.ContainerState;
+import org.apache.log4j.Logger;
+
+public class MetadataService {
+
+ static final Logger log = Logger.getLogger(MetadataService.class);
+
+ static final String CONTAINER_NAMESPACE = "containers";
+
+// static final String LOCK_PATH = "/" + CONTAINER_NAMESPACE + "/lock";
+ static final long POLL_INTERVAL = 100;
+
+ final ApplicationConfig appConfig;
+
+ ZkClient client;
+ String basePath;
+
+ public MetadataService(ApplicationConfig appConfig) {
+ this.appConfig = appConfig;
+ }
+
+ public void start() {
+ basePath = "/" + CONTAINER_NAMESPACE;
+ log.debug(String.format("starting metadata service for '%s/%s'", appConfig.providerAddress, appConfig.providerName));
+
+ client = new ZkClient(appConfig.providerAddress);
+
+ client.createPersistent(basePath, true);
+ }
+
+ public void stop() {
+ log.debug(String.format("stopping metadata service for '%s/%s'", appConfig.providerAddress, appConfig.providerName));
+ if(client != null) {
+ client.close();
+ client = null;
+ }
+ }
+
+// public void lock(long timeout) throws Exception {
+// long limit = System.currentTimeMillis() + timeout;
+// while (limit > System.currentTimeMillis()) {
+// try {
+// client.createEphemeral(LOCK_PATH);
+// return;
+// } catch (Exception ignore) {}
+// Thread.sleep(POLL_INTERVAL);
+// }
+// throw new IllegalStateException("Could not acquire lock");
+// }
+//
+// public void unlock() {
+// client.delete(LOCK_PATH);
+// }
+
+ public void create(ContainerMetadata meta) throws IllegalMetadataStateException {
+ try {
+ client.createPersistent(makePath(meta.id), Utils.toJson(meta));
+ } catch (ZkException e) {
+ throw new IllegalMetadataStateException(e);
+ }
+ }
+
+ public ContainerMetadata read(String id) throws IllegalMetadataStateException {
+ try {
+ return Utils.fromJson(client.<String>readData(makePath(id)));
+ } catch (ZkException e) {
+ throw new IllegalMetadataStateException(e);
+ }
+ }
+
+ public Collection<ContainerMetadata> readAll() throws IllegalMetadataStateException {
+ try {
+ Collection<ContainerMetadata> metadata = new ArrayList<ContainerMetadata>();
+ for(String id : client.getChildren(basePath)) {
+ metadata.add(Utils.fromJson(client.<String>readData(makePath(id))));
+ }
+ return metadata;
+ } catch (ZkException e) {
+ throw new IllegalMetadataStateException(e);
+ }
+ }
+
+ public void update(ContainerMetadata meta) throws IllegalMetadataStateException {
+ try {
+ client.writeData(makePath(meta.id), Utils.toJson(meta));
+ } catch (ZkException e) {
+ throw new IllegalMetadataStateException(e);
+ }
+ }
+
+ public void delete(String id) throws IllegalMetadataStateException {
+ try {
+ client.delete(makePath(id));
+ } catch (ZkException e) {
+ throw new IllegalMetadataStateException(e);
+ }
+ }
+
+ public void waitForState(String id, ContainerState state, long timeout) throws IllegalMetadataStateException, InterruptedException, TimeoutException {
+ long limit = System.currentTimeMillis() + timeout;
+ ContainerMetadata meta = read(id);
+ while(meta.state != state) {
+ if(System.currentTimeMillis() >= limit) {
+ throw new TimeoutException(String.format("Container '%s' failed to reach state '%s' (currently is '%s')", id, state, meta.state));
+ }
+ Thread.sleep(POLL_INTERVAL);
+ meta = read(id);
+ }
+ }
+
+ String makePath(String containerId) {
+ return basePath + "/" + containerId;
+ }
+
+ public static class IllegalMetadataStateException extends Exception {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = -2846997013918977056L;
+
+ public IllegalMetadataStateException() {
+ super();
+ }
+
+ public IllegalMetadataStateException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public IllegalMetadataStateException(String message) {
+ super(message);
+ }
+
+ public IllegalMetadataStateException(Throwable cause) {
+ super(cause);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/Utils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/Utils.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/Utils.java
new file mode 100644
index 0000000..49f70d3
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/Utils.java
@@ -0,0 +1,93 @@
+package org.apache.helix.metamanager.yarn;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.yarn.ContainerMetadata.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.TypeAdapter;
+import com.google.gson.stream.JsonReader;
+import com.google.gson.stream.JsonToken;
+import com.google.gson.stream.JsonWriter;
+
+public class Utils {
+
+ static final Logger log = Logger.getLogger(Utils.class);
+
+ static Gson gson;
+ static {
+ GsonBuilder builder = new GsonBuilder();
+ builder.registerTypeAdapter(ContainerState.class, new ContainerStateAdapter());
+ builder.setPrettyPrinting();
+ gson = builder.create();
+ }
+ static Map<String, LocalResource> dummyResources = createDummyResources();
+
+ static String toJson(ContainerMetadata meta) {
+ return gson.toJson(meta);
+ }
+
+ static ContainerMetadata fromJson(String json) {
+ return gson.fromJson(json, ContainerMetadata.class);
+ }
+
+ static Map<String, LocalResource> getDummyResources() {
+ return dummyResources;
+ }
+
+ private static Map<String, LocalResource> createDummyResources() {
+ File dummy = new File("/tmp/dummy");
+
+ if(!dummy.exists()) {
+ try {
+ dummy.createNewFile();
+ } catch(Exception e) {
+ log.error("could not create dummy file", e);
+ System.exit(1);
+ }
+ }
+
+ Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
+ Path path = new Path(dummy.toURI());
+ LocalResource localResource = Records.newRecord(LocalResource.class);
+ localResource.setType(LocalResourceType.FILE);
+ localResource.setVisibility(LocalResourceVisibility.APPLICATION);
+ localResource.setResource(ConverterUtils.getYarnUrlFromPath(path));
+ localResource.setTimestamp(dummy.lastModified());
+ localResource.setSize(dummy.length());
+ localResources.put("dummy", localResource);
+ return localResources;
+ }
+
+ static class ContainerStateAdapter extends TypeAdapter<ContainerState> {
+ @Override
+ public ContainerState read(JsonReader reader) throws IOException {
+ if (reader.peek() == JsonToken.NULL) {
+ reader.nextNull();
+ return null;
+ }
+ return ContainerState.valueOf(reader.nextString());
+ }
+
+ @Override
+ public void write(JsonWriter writer, ContainerState value) throws IOException {
+ if (value == null) {
+ writer.nullValue();
+ return;
+ }
+ writer.value(value.name());
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnApplication.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnApplication.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnApplication.java
new file mode 100644
index 0000000..7d2099a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnApplication.java
@@ -0,0 +1,126 @@
+package org.apache.helix.metamanager.yarn;
+
+import java.net.InetSocketAddress;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.ClientRMProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+public class YarnApplication {
+
+ static final Logger log = Logger.getLogger(YarnApplication.class);
+
+ static final String ENV_CLUSTER_ADDRESS = "CLUSTER_ADDRESS";
+ static final String ENV_CLUSTER_NAME = "CLUSTER_NAME";
+ static final String ENV_PROVIDER_ADDRESS = "PROVIDER_ADDRESS";
+ static final String ENV_PROVIDER_NAME = "PROVIDER_NAME";
+
+ static final String MASTER_COMMAND = "metamanager.master.command";
+ static final String DEFAULT_MASTER_COMMAND = "/bin/sh /home/apucher/incubator-helix/recipes/meta-cluster-manager/target/meta-cluster-manager-pkg/bin/yarn-master-process.sh 1>%s/stdout 2>%s/stderr";
+
+ Configuration conf;
+ YarnRPC rpc;
+ ClientRMProtocol rmClient;
+ ApplicationId appId;
+
+ final ApplicationConfig appConfig;
+
+ public YarnApplication(ApplicationConfig appConfig) {
+ this.appConfig = appConfig;
+ configure(new YarnConfiguration());
+ }
+
+ public void start() throws Exception {
+ connect();
+
+ String command = String.format(conf.get(MASTER_COMMAND, DEFAULT_MASTER_COMMAND), "/tmp/" + appConfig.providerName, "/tmp/" + appConfig.providerName);
+ //ApplicationConstants.LOG_DIR_EXPANSION_VAR, ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+
+ log.info(String.format("Starting application '%s/%s' (masterCommand='%s')", appConfig.providerAddress, appConfig.providerName, command));
+
+ // app id
+ GetNewApplicationRequest appRequest = Records.newRecord(GetNewApplicationRequest.class);
+ GetNewApplicationResponse appResponse = rmClient.getNewApplication(appRequest);
+
+ this.appId = appResponse.getApplicationId();
+
+ log.info(String.format("Acquired app id '%s' for '%s/%s'", appId.toString(), appConfig.providerAddress, appConfig.providerName));
+
+ // command
+ ContainerLaunchContext launchContext = Records.newRecord(ContainerLaunchContext.class);
+ launchContext.setCommands(Collections.singletonList(command));
+
+ // resource limit
+ Resource resource = Records.newRecord(Resource.class);
+ resource.setMemory(256); // TODO make dynamic
+ launchContext.setResource(resource);
+
+ // environment
+ Map<String, String> env = new HashMap<String, String>();
+ env.put(ENV_CLUSTER_ADDRESS, appConfig.clusterAddress);
+ env.put(ENV_CLUSTER_NAME, appConfig.clusterName);
+ env.put(ENV_PROVIDER_ADDRESS, appConfig.providerAddress);
+ env.put(ENV_PROVIDER_NAME, appConfig.providerName);
+ launchContext.setEnvironment(env);
+
+ // local resources
+ // YARN workaround: create dummy resource
+ Map<String, LocalResource> localResources = Utils.getDummyResources();
+ launchContext.setLocalResources(localResources);
+
+ // app submission
+ ApplicationSubmissionContext subContext = Records.newRecord(ApplicationSubmissionContext.class);
+ subContext.setApplicationId(appId);
+ subContext.setApplicationName(appConfig.providerName);
+ subContext.setAMContainerSpec(launchContext);
+
+ SubmitApplicationRequest subRequest = Records.newRecord(SubmitApplicationRequest.class);
+ subRequest.setApplicationSubmissionContext(subContext);
+
+ log.info(String.format("Starting app id '%s'", appId.toString()));
+
+ rmClient.submitApplication(subRequest);
+
+ }
+
+ public void stop() throws YarnRemoteException {
+ log.info(String.format("Stopping app id '%s'", appId.toString()));
+ KillApplicationRequest killRequest = Records.newRecord(KillApplicationRequest.class);
+ killRequest.setApplicationId(appId);
+
+ rmClient.forceKillApplication(killRequest);
+ }
+
+ void configure(Configuration conf) {
+ this.conf = Preconditions.checkNotNull(conf);
+ this.rpc = YarnRPC.create(conf);
+ }
+
+ void connect() {
+ YarnConfiguration yarnConf = new YarnConfiguration(conf);
+ InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(
+ YarnConfiguration.RM_ADDRESS,
+ YarnConfiguration.DEFAULT_RM_ADDRESS));
+ log.info("Connecting to ResourceManager at: " + rmAddress);
+ this.rmClient = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, conf));
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnClient.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnClient.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnClient.java
new file mode 100644
index 0000000..3447661
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnClient.java
@@ -0,0 +1,5 @@
+package org.apache.helix.metamanager.yarn;
+
+public class YarnClient {
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainer.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainer.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainer.java
new file mode 100644
index 0000000..d36eee9
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainer.java
@@ -0,0 +1,14 @@
+package org.apache.helix.metamanager.yarn;
+
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.util.Tool;
+
+public class YarnContainer extends Configured implements Tool {
+
+ @Override
+ public int run(String[] args) throws Exception {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainerProvider.java
new file mode 100644
index 0000000..34a6b61
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainerProvider.java
@@ -0,0 +1,90 @@
+package org.apache.helix.metamanager.yarn;
+
+import org.apache.helix.metamanager.ClusterContainerProvider;
+import org.apache.helix.metamanager.yarn.ContainerMetadata.ContainerState;
+import org.apache.log4j.Logger;
+
+public class YarnContainerProvider implements ClusterContainerProvider {
+
+ static final Logger log = Logger.getLogger(YarnContainerProvider.class);
+
+ static final String REQUIRED_TYPE = "container";
+
+ static final long LOCK_TIMEOUT = 1000;
+ static final long CONTAINER_TIMEOUT = 10000;
+
+ /*
+ * CONTAINERS
+ * A (A, READY)
+ * B (B, RUNNING)
+ */
+
+ final ApplicationConfig appConfig;
+ final String command;
+
+ final Object notifier = new Object();
+
+ MetadataService metaService;
+
+ public YarnContainerProvider(ApplicationConfig appConfig, String command) {
+ this.appConfig = appConfig;
+ this.command = command;
+ }
+
+ @Override
+ public void create(final String id, final String type) throws Exception {
+ if(!REQUIRED_TYPE.equals(type)) {
+ throw new IllegalArgumentException(String.format("Type '%s' not supported", type));
+ }
+
+ metaService.create(new ContainerMetadata(id, command, appConfig.providerName));
+ metaService.waitForState(id, ContainerState.ACTIVE, CONTAINER_TIMEOUT);
+ }
+
+ @Override
+ public void destroy(final String id) throws Exception {
+ ContainerMetadata meta = metaService.read(id);
+
+ if(meta.state == ContainerState.ACTIVE) {
+ log.info(String.format("Destroying active container, going to teardown"));
+ metaService.update(new ContainerMetadata(meta, ContainerState.TEARDOWN));
+
+ } else if(meta.state == ContainerState.FAILED) {
+ log.info(String.format("Destroying failed container, going to halted"));
+ metaService.update(new ContainerMetadata(meta, ContainerState.HALTED));
+
+ } else if(meta.state == ContainerState.FINALIZE) {
+ log.info(String.format("Destroying finalized container, skipping"));
+
+ } else {
+ throw new IllegalStateException(String.format("Container '%s' must be active, failed or finalized", id));
+ }
+
+ metaService.waitForState(id, ContainerState.FINALIZE, CONTAINER_TIMEOUT);
+ metaService.delete(id);
+ }
+
+ @Override
+ public void destroyAll() {
+ try {
+ for(ContainerMetadata meta : metaService.readAll()) {
+ try { destroy(meta.id); } catch (Exception ignore) {}
+ }
+ } catch (Exception ignore) {
+ // ignore
+ }
+ }
+
+ public void startService() {
+ metaService = new MetadataService(appConfig);
+ metaService.start();
+ }
+
+ public void stopService() {
+ if(metaService != null) {
+ metaService.stop();
+ metaService = null;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainerService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainerService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainerService.java
new file mode 100644
index 0000000..855dddd
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnContainerService.java
@@ -0,0 +1,370 @@
+package org.apache.helix.metamanager.yarn;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ContainerManager;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.records.AMResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.yarn.ContainerMetadata.ContainerState;
+import org.apache.helix.metamanager.yarn.MetadataService.IllegalMetadataStateException;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Maps;
+
+public class YarnContainerService {
+
+ static final Logger log = Logger.getLogger(YarnContainerService.class);
+
+ static final String REQUIRED_TYPE = "container";
+
+ static final long ZOOKEEPER_TIMEOUT = 5000;
+
+ static final long YARNSERVICE_INTERVAL = 1000;
+
+ static final String CONTAINERS = "CONTAINERS";
+
+ static final String CONTAINER_COMMAND = "/bin/sh %s %s %s %s %s %s 1>%s/stdout 2>%s/stderr";
+
+ /*
+ * CONTAINERS
+ * A (A, READY)
+ * B (B, RUNNING)
+ */
+
+ final ApplicationConfig appConfig;
+ final AMRMProtocol yarnClient;
+ final ApplicationAttemptId appAtemptId;
+
+ final Configuration yarnConfig;
+
+ final File dummy = new File("/tmp/dummy");
+
+ final Map<ContainerId, Container> unassignedContainers = new HashMap<ContainerId, Container>();
+ final Map<ContainerId, Container> activeContainers = new HashMap<ContainerId, Container>();
+ final Map<ContainerId, ContainerStatus> completedContainers = new HashMap<ContainerId, ContainerStatus>();
+ final Map<ContainerId, String> yarn2meta = new HashMap<ContainerId, String>();
+
+ int numRequestedLast = 0;
+
+ MetadataService metaService;
+
+ ScheduledExecutorService executor;
+
+ public YarnContainerService(AMRMProtocol yarnClient, Configuration conf, ApplicationAttemptId appAttemptId, ApplicationConfig appConfig) {
+ this.appConfig = appConfig;
+ this.yarnClient = yarnClient;
+ this.appAtemptId = appAttemptId;
+ this.yarnConfig = conf;
+ }
+
+ public void startService() {
+ log.debug("starting container service");
+
+ metaService = new MetadataService(appConfig);
+ metaService.start();
+
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new YarnService(), 0, YARNSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+ }
+
+ public void stopService() {
+ log.debug("stopping container service");
+
+ if(executor != null) {
+ executor.shutdown();
+ while(!executor.isTerminated()) {
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ }
+ executor = null;
+ }
+
+ if(metaService != null) {
+ metaService.stop();
+ metaService = null;
+ }
+ }
+
+ Collection<ContainerMetadata> readOwnedMetadata() throws IllegalMetadataStateException {
+ log.debug("reading container data");
+
+ Collection<ContainerMetadata> containers = new ArrayList<ContainerMetadata>();
+ for(ContainerMetadata meta : metaService.readAll()) {
+ if(meta.owner.equals(appConfig.providerName)) {
+ containers.add(meta);
+ log.debug(String.format("found container node '%s' (state=%s, yarnId=%s, command=%s, owner=%s)",
+ meta.id, meta.state, meta.yarnId, meta.command, meta.owner));
+ }
+ }
+ return containers;
+ }
+
+ class YarnService implements Runnable {
+ int responseId = 0;
+
+ @Override
+ public void run() {
+ try {
+ log.debug("running yarn service update cycle");
+
+ Collection<ContainerMetadata> metadata = readOwnedMetadata();
+
+ // active meta containers
+ int numMetaActive = countActiveMeta(metadata);
+
+ // newly acquired meta containers
+ int numMetaAcquire = countAcquireMeta(metadata);
+
+ // destroyed meta containers
+ List<ContainerId> destroyedReleasedIds = createDestroyedReleaseList(metadata);
+ int numMetaCompleted = destroyedReleasedIds.size();
+
+ int numMeta = numMetaAcquire + numMetaActive + numMetaCompleted;
+
+ // yarn containers
+ int numYarnUnassigned = unassignedContainers.size();
+ int numYarnActive = activeContainers.size();
+ int numYarnCompleted = completedContainers.size();
+ int numYarn = numYarnUnassigned + numYarnActive + numYarnCompleted;
+
+ int numYarnRequired = numMetaAcquire - numYarnUnassigned;
+
+ // additionally required containers
+ int numRequestAdditional = Math.max(0, numYarnRequired);
+
+ // overstock containers
+ List<ContainerId> unneededReleasedIds = createOverstockReleaseList(numYarnRequired);
+
+ log.debug(String.format("meta containers (total=%d, acquire=%d, active=%d, completed=%d)", numMeta, numMetaAcquire, numMetaActive, numMetaCompleted));
+ log.debug(String.format("yarn containers (total=%d, unassigned=%d, active=%d, completed=%d)", numYarn, numYarnUnassigned, numYarnActive, numYarnCompleted));
+ log.debug(String.format("requesting %d new containers (%d requested last), releasing %d", numRequestAdditional, numRequestedLast, destroyedReleasedIds.size()));
+
+ Priority priority = Records.newRecord(Priority.class);
+ priority.setPriority(0);
+
+ Resource resource = Records.newRecord(Resource.class);
+ resource.setMemory(256); // TODO make dynamic
+
+ ResourceRequest resourceRequest = Records.newRecord(ResourceRequest.class);
+ resourceRequest.setHostName("*");
+ resourceRequest.setNumContainers(numRequestAdditional);
+ resourceRequest.setPriority(priority);
+ resourceRequest.setCapability(resource);
+
+ AllocateRequest request = Records.newRecord(AllocateRequest.class);
+ request.setResponseId(responseId);
+ request.setApplicationAttemptId(appAtemptId);
+ request.addAsk(resourceRequest);
+ request.addAllReleases(destroyedReleasedIds);
+ request.addAllReleases(unneededReleasedIds);
+
+ responseId++;
+
+ AllocateResponse allocateResponse = null;
+ try {
+ allocateResponse = yarnClient.allocate(request);
+ } catch (YarnRemoteException e) {
+ // ignore
+ log.error("Error allocating containers", e);
+ return;
+ }
+
+ numRequestedLast = numRequestAdditional;
+
+ AMResponse response = allocateResponse.getAMResponse();
+
+ // newly added containers
+ for(Container container : response.getAllocatedContainers()) {
+ unassignedContainers.put(container.getId(), container);
+ }
+
+ log.info(String.format("%d new containers available, %d required", unassignedContainers.size(), numMetaAcquire));
+
+ Iterator<Container> itYarn = unassignedContainers.values().iterator();
+ Iterator<ContainerMetadata> itMeta = metadata.iterator();
+ while(itYarn.hasNext() && itMeta.hasNext()) {
+ ContainerMetadata meta = itMeta.next();
+
+ if(meta.yarnId >= 0)
+ continue;
+
+ Container containerYarn = itYarn.next();
+
+ log.debug(String.format("assigning yarn container '%s' to container node '%s'", containerYarn.getId(), meta.id));
+
+ String command = String.format(CONTAINER_COMMAND, meta.command,
+ appConfig.clusterAddress, appConfig.clusterName, appConfig.providerAddress, appConfig.providerName,
+ meta.id, "/tmp/" + meta.id, "/tmp/" + meta.id);
+ //ApplicationConstants.LOG_DIR_EXPANSION_VAR, ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+
+ ContainerLaunchContext context = Records.newRecord(ContainerLaunchContext.class);
+ context.setContainerId(containerYarn.getId());
+ context.setResource(containerYarn.getResource());
+ context.setEnvironment(Maps.<String, String>newHashMap());
+ context.setCommands(Collections.singletonList(command));
+ context.setLocalResources(Utils.getDummyResources());
+ try {
+ context.setUser(UserGroupInformation.getCurrentUser().getShortUserName());
+ } catch (IOException e) {
+ log.error(String.format("failed setting up container '%s' user information", meta.id));
+ return;
+ }
+
+ log.debug(String.format("container '%s' executing command '%s'", meta.id, command));
+
+ StartContainerRequest startReq = Records.newRecord(StartContainerRequest.class);
+ startReq.setContainerLaunchContext(context);
+
+ try {
+ getContainerManager(containerYarn).startContainer(startReq);
+
+ } catch (YarnRemoteException e) {
+ log.error(String.format("Error starting container '%s'", meta.id), e);
+ return;
+ }
+
+ log.debug(String.format("container '%s' started, updating container node", meta.id));
+
+ metaService.update(new ContainerMetadata(meta, ContainerState.CONNECTING, containerYarn.getId().getId()));
+ yarn2meta.put(containerYarn.getId(), meta.id);
+
+ log.debug(String.format("removing '%s' from unassigned yarn containers and adding to active list", containerYarn.getId()));
+
+ itYarn.remove();
+ activeContainers.put(containerYarn.getId(), containerYarn);
+
+ }
+
+ for(ContainerStatus status : response.getCompletedContainersStatuses()) {
+ ContainerId id = status.getContainerId();
+
+ log.info(String.format("Container '%s' completed", id));
+
+ if(unassignedContainers.containsKey(id)) {
+ log.info(String.format("Unassigned container '%s' terminated, removing", id));
+ unassignedContainers.remove(id);
+ // TODO handle
+ }
+
+ if(activeContainers.containsKey(id)) {
+ log.info(String.format("Active container '%s' terminated, removing", id));
+ activeContainers.remove(id);
+
+ String metaId = yarn2meta.get(id);
+ ContainerMetadata meta = metaService.read(metaId);
+
+ log.debug(String.format("container '%s' finalized, updating container node", meta.id));
+
+ metaService.update(new ContainerMetadata(meta, ContainerState.FINALIZE));
+ }
+
+ completedContainers.put(id, status);
+ }
+
+ log.debug("yarn service update cycle complete");
+
+ } catch (Exception e) {
+ log.error("Error while executing yarn update cycle", e);
+ }
+ }
+
+ private List<ContainerId> createOverstockReleaseList(int numYarnRequired) {
+ List<ContainerId> unneededReleasedIds = new ArrayList<ContainerId>();
+ Iterator<Container> itUnassigned = unassignedContainers.values().iterator();
+ if(numYarnRequired < 0) {
+ for(int i=0; i<-numYarnRequired && itUnassigned.hasNext(); i++) {
+ Container container = itUnassigned.next();
+ unneededReleasedIds.add(container.getId());
+ log.debug(String.format("Container '%s' no longer required, removing", container.getId()));
+ itUnassigned.remove();
+ }
+ }
+ return unneededReleasedIds;
+ }
+
+ private List<ContainerId> createDestroyedReleaseList(
+ Collection<ContainerMetadata> metadata) {
+ List<ContainerId> releasedIds = new ArrayList<ContainerId>();
+ for(ContainerMetadata meta : metadata) {
+ if(meta.state == ContainerState.HALTED) {
+ ContainerId containerId = Records.newRecord(ContainerId.class);
+ containerId.setApplicationAttemptId(appAtemptId);
+ containerId.setId(meta.yarnId);
+ releasedIds.add(containerId);
+ log.debug(String.format("releasing container '%s'", containerId));
+ }
+ }
+ return releasedIds;
+ }
+
+ private int countAcquireMeta(Collection<ContainerMetadata> metadata) {
+ int numMetaAcquire = 0;
+ for(ContainerMetadata meta : metadata) {
+ if(meta.state == ContainerState.ACQUIRE) {
+ numMetaAcquire++;
+ }
+ }
+ return numMetaAcquire;
+ }
+
+ private int countActiveMeta(Collection<ContainerMetadata> metadata) {
+ int numMetaActive = 0;
+ for(ContainerMetadata meta : metadata) {
+ if(meta.state != ContainerState.ACQUIRE &&
+ meta.state != ContainerState.HALTED &&
+ meta.state != ContainerState.FINALIZE) {
+ numMetaActive++;
+ }
+ }
+ return numMetaActive;
+ }
+ }
+
+ private ContainerManager getContainerManager(Container container) {
+ YarnConfiguration yarnConf = new YarnConfiguration(yarnConfig);
+ YarnRPC rpc = YarnRPC.create(yarnConf);
+ NodeId nodeId = container.getNodeId();
+ String containerIpPort = String.format("%s:%d", nodeId.getHost(),
+ nodeId.getPort());
+ log.info("Connecting to ContainerManager at: " + containerIpPort);
+ InetSocketAddress addr = NetUtils.createSocketAddr(containerIpPort);
+ ContainerManager cm = (ContainerManager) rpc.getProxy(
+ ContainerManager.class, addr, yarnConfig);
+ return cm;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnHelper.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnHelper.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnHelper.java
new file mode 100644
index 0000000..4314bdc
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnHelper.java
@@ -0,0 +1,5 @@
+package org.apache.helix.metamanager.yarn;
+
+public class YarnHelper {
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnMaster.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnMaster.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnMaster.java
new file mode 100644
index 0000000..a2aef0e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnMaster.java
@@ -0,0 +1,134 @@
+package org.apache.helix.metamanager.yarn;
+
+import java.net.InetSocketAddress;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.log4j.Logger;
+
+public class YarnMaster extends Configured implements Tool {
+
+ static final Logger log = Logger.getLogger(YarnMaster.class);
+
+ AMRMProtocol resourceManager;
+ ApplicationAttemptId appAttemptId;
+
+ YarnContainerService service;
+
+ @Override
+ public int run(String[] args) throws Exception {
+ log.trace("BEGIN YarnMaster.run()");
+
+ Configuration conf = getConf();
+
+ this.appAttemptId = getApplicationAttemptId();
+ log.info(String.format("Got application attempt id '%s'", appAttemptId.toString()));
+
+ log.debug("Getting resource manager");
+ this.resourceManager = getResourceManager(conf);
+
+ // register the AM with the RM
+ log.debug("Registering application master");
+ RegisterApplicationMasterRequest appMasterRequest =
+ Records.newRecord(RegisterApplicationMasterRequest.class);
+ appMasterRequest.setApplicationAttemptId(appAttemptId);
+ appMasterRequest.setHost("");
+ appMasterRequest.setRpcPort(0);
+ appMasterRequest.setTrackingUrl("");
+
+ resourceManager.registerApplicationMaster(appMasterRequest);
+
+ String clusterAddress = getEnv(YarnApplication.ENV_CLUSTER_ADDRESS);
+ String clusterName = getEnv(YarnApplication.ENV_CLUSTER_NAME);
+ String providerAddress = getEnv(YarnApplication.ENV_PROVIDER_ADDRESS);
+ String providerName = getEnv(YarnApplication.ENV_PROVIDER_NAME);
+ ApplicationConfig appConfig = new ApplicationConfig(clusterAddress, clusterName, providerAddress, providerName);
+
+ service = new YarnContainerService(resourceManager, conf, appAttemptId, appConfig);
+ service.startService();
+
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+
+ service.stopService();
+
+ // finish application
+ log.debug("Sending finish request");
+ FinishApplicationMasterRequest finishReq =
+ Records.newRecord(FinishApplicationMasterRequest.class);
+
+ finishReq.setAppAttemptId(getApplicationAttemptId());
+ finishReq.setFinishApplicationStatus(FinalApplicationStatus.SUCCEEDED);
+
+ try { resourceManager.finishApplicationMaster(finishReq); } catch(Exception ignore) {}
+ }
+ }));
+
+ try { Thread.currentThread().join(); } catch(Exception ignore) {}
+
+ log.trace("END YarnMaster.run()");
+
+ return 0;
+ }
+
+ private AMRMProtocol getResourceManager(Configuration conf) {
+ // Connect to the Scheduler of the ResourceManager.
+ YarnConfiguration yarnConf = new YarnConfiguration(conf);
+ YarnRPC rpc = YarnRPC.create(yarnConf);
+ InetSocketAddress rmAddress =
+ NetUtils.createSocketAddr(yarnConf.get(
+ YarnConfiguration.RM_SCHEDULER_ADDRESS,
+ YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS));
+ log.info("Connecting to ResourceManager at " + rmAddress);
+ AMRMProtocol resourceManager =
+ (AMRMProtocol) rpc.getProxy(AMRMProtocol.class, rmAddress, conf);
+ return resourceManager;
+ }
+
+ private ApplicationAttemptId getApplicationAttemptId() {
+ ContainerId containerId = ConverterUtils.toContainerId(getEnv(ApplicationConstants.AM_CONTAINER_ID_ENV));
+ ApplicationAttemptId appAttemptID = containerId.getApplicationAttemptId();
+ return appAttemptID;
+ }
+
+ private String getEnv(String key) {
+ Map<String, String> envs = System.getenv();
+ String clusterName = envs.get(key);
+ if (clusterName == null) {
+ // container id should always be set in the env by the framework
+ throw new IllegalArgumentException(
+ String.format("%s not set in the environment", key));
+ }
+ return clusterName;
+ }
+
+ public static void main(String[] args) throws Exception {
+ log.trace("BEGIN YarnMaster.main()");
+
+ try {
+ int rc = ToolRunner.run(new Configuration(), new YarnMaster(), args);
+ System.exit(rc);
+ } catch (Exception e) {
+ System.err.println(e);
+ System.exit(1);
+ }
+
+ log.trace("END YarnMaster.main()");
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnProcess.java
new file mode 100644
index 0000000..7108d39
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/yarn/YarnProcess.java
@@ -0,0 +1,171 @@
+package org.apache.helix.metamanager.yarn;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.metamanager.managed.ManagedFactory;
+import org.apache.helix.metamanager.yarn.ContainerMetadata.ContainerState;
+import org.apache.log4j.Logger;
+
+public class YarnProcess {
+ static final Logger log = Logger.getLogger(YarnProcess.class);
+
+ static final long CONTAINERSERVICE_INTERVAL = 1000;
+
+ final ApplicationConfig appConfig;
+ final String containerId;
+
+ HelixManager participantManager;
+
+ MetadataService metaService;
+ ScheduledExecutorService executor;
+
+
+ public YarnProcess(ApplicationConfig appConfig, String containerId) {
+ this.appConfig = appConfig;
+ this.containerId = containerId;
+ }
+
+ public void startService() {
+ log.info(String.format("start metadata service for '%s'", containerId));
+ metaService = new MetadataService(appConfig);
+ metaService.start();
+
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new ContainerService(), 0, CONTAINERSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+ }
+
+ public void stopService() {
+ log.info(String.format("stop metadata service for '%s'", containerId));
+ if (metaService != null) {
+ metaService.stop();
+ metaService = null;
+ }
+
+ if(executor != null) {
+ executor.shutdown();
+ }
+ }
+
+ public boolean isRunning() {
+ if(executor == null)
+ return false;
+ return !executor.isTerminated();
+ }
+
+ public void startParticipant() throws Exception {
+ log.info("STARTING " + containerId);
+ participantManager = HelixManagerFactory.getZKHelixManager(appConfig.clusterName,
+ containerId, InstanceType.PARTICIPANT, appConfig.clusterAddress);
+ participantManager.getStateMachineEngine().registerStateModelFactory(
+ "MasterSlave", new ManagedFactory());
+ participantManager.connect();
+ log.info("STARTED " + containerId);
+ }
+
+ public void stopParticipant() {
+ if (participantManager != null) {
+ participantManager.disconnect();
+ participantManager = null;
+ }
+ }
+
+ public void updateContainerStatus() {
+ log.info("updating container status");
+ try {
+ ContainerMetadata meta = metaService.read(containerId);
+
+ if(meta.state == ContainerState.CONNECTING) {
+ log.info("container connecting, going to active");
+ try {
+ startParticipant();
+ metaService.update(new ContainerMetadata(meta, ContainerState.ACTIVE));
+ } catch (Exception e) {
+ log.error("Failed to start participant, going to failed", e);
+ stopParticipant();
+ metaService.update(new ContainerMetadata(meta, ContainerState.FAILED));
+ }
+ }
+
+ if(meta.state == ContainerState.ACTIVE) {
+ // do something
+ // and go to failed on error
+ }
+
+ if(meta.state == ContainerState.TEARDOWN) {
+ log.info("container teardown, going to halted");
+ stopParticipant();
+ metaService.update(new ContainerMetadata(meta, ContainerState.HALTED));
+ stopService();
+ }
+
+ } catch(Exception e) {
+ log.warn(String.format("Container '%s' does not exist, stopping service", containerId));
+ stopService();
+ }
+ }
+
+ class ContainerService implements Runnable {
+ @Override
+ public void run() {
+ updateContainerStatus();
+ }
+ }
+
+ public static void main(String[] args) throws Exception
+ {
+ log.trace("BEGIN YarnProcess.main()");
+
+ final String clusterAddress = args[0];
+ final String clusterName = args[1];
+ final String providerAddress = args[2];
+ final String providerName = args[3];
+ final String containerId = args[4];
+
+ final ApplicationConfig appConfig = new ApplicationConfig(clusterAddress, clusterName, providerAddress, providerName);
+
+ final YarnProcess yarnProcess = new YarnProcess(appConfig, containerId);
+
+ yarnProcess.startService();
+
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ yarnProcess.stopService();
+ }
+ }));
+
+ while(yarnProcess.isRunning()) {
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ }
+
+ log.trace("END YarnProcess.main()");
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/2by2local.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/2by2local.properties b/recipes/meta-cluster-manager/src/main/resources/2by2local.properties
new file mode 100644
index 0000000..ac7968a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/2by2local.properties
@@ -0,0 +1,52 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Meta Cluster
+#
+meta.cluster=meta
+meta.managed=managed
+meta.address=localhost:2199
+
+meta.target.type=static
+meta.target.database=3
+meta.target.webserver=5
+
+meta.status.type=local
+
+meta.provider.type=local
+meta.provider.name=provider_0
+meta.provider.address=localhost:2199
+meta.provider.cluster=managed
+
+meta.provider.containers=database,webserver
+
+meta.provider.container.database.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+meta.provider.container.database.prop1=foo
+meta.provider.container.database.prop2=bar
+
+meta.provider.container.webserver.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+meta.provider.container.webserver.prop1=foo
+meta.provider.container.webserver.prop2=bar
+
+#
+# Managed Cluster
+#
+managed.cluster=managed
+managed.address=localhost:2199
+
+managed.resources=dbprod,wsprod
+
+managed.resource.dbprod.container=database
+managed.resource.dbprod.model=MasterSlave
+managed.resource.dbprod.partitions=8
+managed.resource.dbprod.replica=3
+
+managed.resource.wsprod.container=webserver
+managed.resource.wsprod.model=MasterSlave
+managed.resource.wsprod.partitions=15
+managed.resource.wsprod.replica=1
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/2by2localMixedModels.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/2by2localMixedModels.properties b/recipes/meta-cluster-manager/src/main/resources/2by2localMixedModels.properties
new file mode 100644
index 0000000..3971375
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/2by2localMixedModels.properties
@@ -0,0 +1,52 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Meta Cluster
+#
+meta.cluster=meta
+meta.managed=managed
+meta.address=localhost:2199
+
+meta.target.type=static
+meta.target.database=3
+meta.target.webserver=5
+
+meta.status.type=local
+
+meta.provider.type=local
+meta.provider.name=provider_0
+meta.provider.address=localhost:2199
+meta.provider.cluster=managed
+
+meta.provider.containers=database,webserver
+
+meta.provider.container.database.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+meta.provider.container.database.prop1=foo
+meta.provider.container.database.prop2=bar
+
+meta.provider.container.webserver.class=org.apache.helix.metamanager.impl.container.DummyOnlineOfflineProcess
+meta.provider.container.webserver.prop1=foo
+meta.provider.container.webserver.prop2=bar
+
+#
+# Managed Cluster
+#
+managed.cluster=managed
+managed.address=localhost:2199
+
+managed.resources=dbprod,wsprod
+
+managed.resource.dbprod.container=database
+managed.resource.dbprod.model=MasterSlave
+managed.resource.dbprod.partitions=8
+managed.resource.dbprod.replica=3
+
+managed.resource.wsprod.container=webserver
+managed.resource.wsprod.model=OnlineOffline
+managed.resource.wsprod.partitions=15
+managed.resource.wsprod.replica=1
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/2by2shell.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/2by2shell.properties b/recipes/meta-cluster-manager/src/main/resources/2by2shell.properties
new file mode 100644
index 0000000..a26f250
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/2by2shell.properties
@@ -0,0 +1,52 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Meta Cluster
+#
+meta.cluster=meta
+meta.managed=managed
+meta.address=localhost:2199
+
+meta.target.type=static
+meta.target.database=3
+meta.target.webserver=5
+
+meta.status.type=shell
+
+meta.provider.type=shell
+meta.provider.name=provider_0
+meta.provider.address=localhost:2199
+meta.provider.cluster=managed
+
+meta.provider.containers=database,webserver
+
+meta.provider.container.database.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+meta.provider.container.database.prop1=foo
+meta.provider.container.database.prop2=bar
+
+meta.provider.container.webserver.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+meta.provider.container.webserver.prop1=foo
+meta.provider.container.webserver.prop2=bar
+
+#
+# Managed Cluster
+#
+managed.cluster=managed
+managed.address=localhost:2199
+
+managed.resources=dbprod,wsprod
+
+managed.resource.dbprod.container=database
+managed.resource.dbprod.model=MasterSlave
+managed.resource.dbprod.partitions=8
+managed.resource.dbprod.replica=3
+
+managed.resource.wsprod.container=webserver
+managed.resource.wsprod.model=MasterSlave
+managed.resource.wsprod.partitions=15
+managed.resource.wsprod.replica=1
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/2by2yarn.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/2by2yarn.properties b/recipes/meta-cluster-manager/src/main/resources/2by2yarn.properties
new file mode 100644
index 0000000..6afd2c6
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/2by2yarn.properties
@@ -0,0 +1,58 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Meta Cluster
+#
+meta.cluster=meta
+meta.managed=managed
+meta.address=rm:2199
+
+meta.target.type=static
+meta.target.database=3
+meta.target.webserver=5
+
+meta.status.type=yarn
+meta.status.metadata=rm:2199
+
+meta.provider.type=yarn
+meta.provider.name=provider_0
+meta.provider.address=rm:2199
+meta.provider.cluster=managed
+meta.provider.metadata=rm:2199
+meta.provider.resourcemananger=rm:8032
+meta.provider.scheduler=rm:8030
+meta.provider.user=yarn
+meta.provider.hdfs=hdfs://rm:9000/
+
+meta.provider.containers=database,webserver
+
+meta.provider.container.database.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+meta.provider.container.database.prop1=foo
+meta.provider.container.database.prop2=bar
+
+meta.provider.container.webserver.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+meta.provider.container.webserver.prop1=foo
+meta.provider.container.webserver.prop2=bar
+
+#
+# Managed Cluster
+#
+managed.cluster=managed
+managed.address=rm:2199
+
+managed.resources=dbprod,wsprod
+
+managed.resource.dbprod.container=database
+managed.resource.dbprod.model=MasterSlave
+managed.resource.dbprod.partitions=8
+managed.resource.dbprod.replica=3
+
+managed.resource.wsprod.container=webserver
+managed.resource.wsprod.model=MasterSlave
+managed.resource.wsprod.partitions=15
+managed.resource.wsprod.replica=1
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/2by2yarnZookeeper.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/2by2yarnZookeeper.properties b/recipes/meta-cluster-manager/src/main/resources/2by2yarnZookeeper.properties
new file mode 100644
index 0000000..66f3637
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/2by2yarnZookeeper.properties
@@ -0,0 +1,58 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Meta Cluster
+#
+meta.cluster=meta
+meta.managed=managed
+meta.address=rm:2199
+
+meta.target.type=static
+meta.target.database=3
+meta.target.webserver=5
+
+meta.status.type=yarn
+meta.status.metadata=rm:2199
+
+meta.provider.type=yarn
+meta.provider.name=provider_0
+meta.provider.address=rm:2199
+meta.provider.cluster=managed
+meta.provider.metadata=rm:2199
+meta.provider.resourcemananger=rm:8032
+meta.provider.scheduler=rm:8030
+meta.provider.user=yarn
+meta.provider.hdfs=hdfs://rm:9000/
+
+meta.provider.containers=database,webserver
+
+meta.provider.container.database.class=org.apache.helix.metamanager.impl.container.ZookeeperMasterSlaveProcess
+meta.provider.container.database.address=rm:2199
+meta.provider.container.database.root=mydatabase
+
+meta.provider.container.webserver.class=org.apache.helix.metamanager.impl.container.ZookeeperMasterSlaveProcess
+meta.provider.container.webserver.address=rm:2199
+meta.provider.container.webserver.root=mywebserver
+
+#
+# Managed Cluster
+#
+managed.cluster=managed
+managed.address=rm:2199
+
+managed.resources=dbprod,wsprod
+
+managed.resource.dbprod.container=database
+managed.resource.dbprod.model=MasterSlave
+managed.resource.dbprod.partitions=8
+managed.resource.dbprod.replica=3
+
+managed.resource.wsprod.container=webserver
+managed.resource.wsprod.model=MasterSlave
+managed.resource.wsprod.partitions=15
+managed.resource.wsprod.replica=1
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/2meta2managed.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/2meta2managed.properties b/recipes/meta-cluster-manager/src/main/resources/2meta2managed.properties
new file mode 100644
index 0000000..b719620
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/2meta2managed.properties
@@ -0,0 +1,52 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Meta Cluster
+#
+meta.cluster=meta
+meta.managed=managed
+meta.address=localhost:2199
+
+meta.target.type=static
+meta.target.database=3
+meta.target.webserver=5
+
+meta.status.type=local
+
+meta.provider.type=local
+meta.provider.name=provider_0
+meta.provider.address=localhost:2199
+meta.provider.cluster=managed
+
+meta.provider.containers=database,webserver
+
+meta.provider.container.database.class=org.apache.helix.metamanager.container.impl.DummyProcess
+meta.provider.container.database.prop1=foo
+meta.provider.container.database.prop2=bar
+
+meta.provider.container.webserver.class=org.apache.helix.metamanager.container.impl.DummyProcess
+meta.provider.container.webserver.prop1=foo
+meta.provider.container.webserver.prop2=bar
+
+#
+# Managed Cluster
+#
+managed.cluster=managed
+managed.address=localhost:2199
+
+managed.resources=dbprod,wsprod
+
+managed.resource.dbprod.container=database
+managed.resource.dbprod.model=MasterSlave
+managed.resource.dbprod.partitions=8
+managed.resource.dbprod.replica=3
+
+managed.resource.wsprod.container=webserver
+managed.resource.wsprod.model=MasterSlave
+managed.resource.wsprod.partitions=15
+managed.resource.wsprod.replica=1
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Local.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/Boot2By2Local.properties b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Local.properties
new file mode 100644
index 0000000..4eb07bd
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Local.properties
@@ -0,0 +1,87 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=localhost:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=localhost:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=localhost:2199
+metaresource.0.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=localhost:2199
+metaresource.1.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=localhost:2199
+metaprovider.0.class=org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=localhost:2199
+metaprovider.1.class=org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.metamanager.impl.local.LocalStatusProvider
+
+metacontroller.target.class=org.apache.helix.metamanager.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3
[12/15] Adding Helix-task-framework and Yarn integration modules
Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ZookeeperService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ZookeeperService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ZookeeperService.java
new file mode 100644
index 0000000..8d78f9b
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/bootstrapper/ZookeeperService.java
@@ -0,0 +1,64 @@
+package org.apache.helix.autoscale.bootstrapper;
+
+import java.io.File;
+import java.util.Properties;
+
+import org.I0Itec.zkclient.IDefaultNameSpace;
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkServer;
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.autoscale.Service;
+import org.apache.log4j.Logger;
+
+/**
+ * Bootstrapping zookeeper. Convenience tool for creating standalone zookeeper
+ * instance for test deployments. For production use a separate zookeeper
+ * cluster is strongly recommended.
+ *
+ */
+public class ZookeeperService implements Service {
+
+ static final Logger log = Logger.getLogger(ZookeeperService.class);
+
+ String dataDir;
+ String logDir;
+ int port;
+
+ ZkServer server;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ dataDir = properties.getProperty("datadir", "/tmp/zk/data");
+ logDir = properties.getProperty("logdir", "/tmp/zk/log");
+ port = Integer.parseInt(properties.getProperty("port", "2199"));
+ }
+
+ @Override
+ public void start() {
+ log.info(String.format("starting zookeeper service (dataDir='%s', logDir='%s', port=%d)", dataDir, logDir, port));
+
+ FileUtils.deleteQuietly(new File(dataDir));
+ FileUtils.deleteQuietly(new File(logDir));
+
+ IDefaultNameSpace defaultNameSpace = new IDefaultNameSpace() {
+ @Override
+ public void createDefaultNameSpace(ZkClient zkClient) {
+ // left blank
+ }
+ };
+
+ server = new ZkServer(dataDir, logDir, defaultNameSpace, port);
+ server.start();
+ }
+
+ @Override
+ public void stop() {
+ log.info("stopping zookeeper service");
+
+ if (server != null) {
+ server.shutdown();
+ server = null;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerProcess.java
new file mode 100644
index 0000000..343e426
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerProcess.java
@@ -0,0 +1,133 @@
+package org.apache.helix.autoscale.container;
+
+import java.util.Properties;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base service for spawn-able container types. Configure from Properties and
+ * instantiates Helix participant to managed cluster.
+ *
+ */
+public abstract class ContainerProcess implements Service {
+ static final Logger log = Logger.getLogger(ContainerProcess.class);
+
+ private ContainerProcessProperties properties;
+ private HelixManager participantManager;
+
+ private String modelName;
+ private StateModelFactory<? extends StateModel> modelFactory;
+
+ private String instanceName;
+ private String clusterName;
+ private String zookeeperAddress;
+
+ private boolean active = false;
+ private boolean failed = false;
+
+ public final void setModelName(String modelName) {
+ this.modelName = modelName;
+ }
+
+ public final void setModelFactory(StateModelFactory<? extends StateModel> modelFactory) {
+ this.modelFactory = modelFactory;
+ }
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ ContainerProcessProperties containerProps = new ContainerProcessProperties();
+ containerProps.putAll(properties);
+ Preconditions.checkArgument(containerProps.isValid());
+
+ this.properties = containerProps;
+ this.instanceName = containerProps.getName();
+ this.clusterName = containerProps.getCluster();
+ this.zookeeperAddress = containerProps.getAddress();
+ }
+
+ @Override
+ public final void start() {
+ try {
+ Preconditions.checkNotNull(modelName, "state model name not set");
+ Preconditions.checkNotNull(modelFactory, "state model factory not set");
+ Preconditions.checkState(properties.isValid(), "process properties not valid: %s", properties.toString());
+
+ log.info(String.format("starting container '%s'", instanceName));
+ startContainer();
+
+ log.info(String.format("starting helix participant '%s'", instanceName));
+ startParticipant();
+
+ active = true;
+
+ } catch (Exception e) {
+ log.error(String.format("starting container '%s' failed", instanceName), e);
+ fail();
+ }
+ }
+
+ protected abstract void startContainer() throws Exception;
+
+ private final void startParticipant() throws Exception {
+ participantManager = HelixManagerFactory.getZKHelixManager(clusterName, instanceName, InstanceType.PARTICIPANT, zookeeperAddress);
+ participantManager.getStateMachineEngine().registerStateModelFactory(modelName, modelFactory);
+ participantManager.connect();
+ }
+
+ @Override
+ public final void stop() {
+ try {
+ log.info(String.format("stopping helix participant '%s'", instanceName));
+ stopParticipant();
+
+ log.info(String.format("stopping container '%s'", instanceName));
+ stopContainer();
+
+ active = false;
+
+ } catch (Exception e) {
+ log.warn(String.format("stopping container '%s' failed", instanceName), e);
+ }
+ }
+
+ protected abstract void stopContainer() throws Exception;
+
+ private final void stopParticipant() {
+ if (participantManager != null) {
+ participantManager.disconnect();
+ }
+ }
+
+ public final void fail() {
+ failed = true;
+ }
+
+ public final boolean isActive() {
+ return active && !failed;
+ }
+
+ public final boolean isFailed() {
+ return failed;
+ }
+
+ public final ContainerProcessProperties getProperties() {
+ return properties;
+ }
+
+ String getModelName() {
+ return modelName;
+ }
+
+ StateModelFactory<? extends StateModel> getModelFactory() {
+ return modelFactory;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerProcessProperties.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerProcessProperties.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerProcessProperties.java
new file mode 100644
index 0000000..1096174
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerProcessProperties.java
@@ -0,0 +1,66 @@
+package org.apache.helix.autoscale.container;
+
+import java.util.Properties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base configuration for ContainerProcess.
+ *
+ */
+public class ContainerProcessProperties extends Properties {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 5754863079470995536L;
+
+ public static final String CLUSTER = "cluster";
+ public static final String ADDRESS = "address";
+ public static final String NAME = "name";
+ public static final String CONTAINER_CLASS = "class";
+
+ public ContainerProcessProperties() {
+ // left blank
+ }
+
+ public ContainerProcessProperties(Properties properties) {
+ Preconditions.checkNotNull(properties);
+ putAll(properties);
+ }
+
+ public boolean isValid() {
+ return containsKey(CLUSTER) &&
+ containsKey(NAME) &&
+ containsKey(ADDRESS) &&
+ containsKey(CONTAINER_CLASS);
+ }
+
+ public String getCluster() {
+ return getProperty(CLUSTER);
+ }
+
+ public String getAddress() {
+ return getProperty(ADDRESS);
+ }
+
+ public String getName() {
+ return getProperty(NAME);
+ }
+
+ public String getContainerClass() {
+ return getProperty(CONTAINER_CLASS);
+ }
+
+ @Override
+ public synchronized Object get(Object key) {
+ Preconditions.checkState(containsKey(key));
+ return super.get(key);
+ }
+
+ @Override
+ public String getProperty(String key) {
+ Preconditions.checkState(containsKey(key));
+ return super.getProperty(key);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerUtils.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerUtils.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerUtils.java
new file mode 100644
index 0000000..8bab01e
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/container/ContainerUtils.java
@@ -0,0 +1,46 @@
+package org.apache.helix.autoscale.container;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.lang.reflect.Constructor;
+
+import org.apache.log4j.Logger;
+
+/**
+ * Utility for loading ContainerProperties and spawning ContainerProcess.
+ *
+ */
+public class ContainerUtils {
+
+ static final Logger log = Logger.getLogger(ContainerUtils.class);
+
+ private ContainerUtils() {
+ // left blank
+ }
+
+ public static ContainerProcess createProcess(ContainerProcessProperties properties) throws Exception {
+ String containerClassName = properties.getContainerClass();
+
+ Class<?> containerClass = Class.forName(containerClassName);
+
+ log.debug(String.format("checking for properties constructor in class '%s'", containerClassName));
+
+ Constructor<?> constructor = containerClass.getConstructor(ContainerProcessProperties.class);
+
+ return (ContainerProcess) constructor.newInstance(properties);
+ }
+
+ public static ContainerProcessProperties getPropertiesFromResource(String resourceName) throws IOException {
+ ContainerProcessProperties properties = new ContainerProcessProperties();
+ properties.load(ClassLoader.getSystemResourceAsStream(resourceName));
+ return properties;
+ }
+
+ public static ContainerProcessProperties getPropertiesFromPath(String filePath) throws IOException {
+ ContainerProcessProperties properties = new ContainerProcessProperties();
+ properties.load(new InputStreamReader(new FileInputStream(filePath)));
+ return properties;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/FileTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/FileTargetProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/FileTargetProvider.java
new file mode 100644
index 0000000..ebbf4b6
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/FileTargetProvider.java
@@ -0,0 +1,51 @@
+package org.apache.helix.autoscale.impl;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.helix.autoscale.TargetProviderService;
+
+/**
+ * File-based target model. Container count is extracted from properties file. Count may change dynamically.
+ *
+ */
+public class FileTargetProvider implements TargetProviderService {
+
+ File file;
+
+ public FileTargetProvider() {
+ // left blank
+ }
+
+ public FileTargetProvider(String path) {
+ this.file = new File(path);
+ }
+
+ @Override
+ public int getTargetContainerCount(String containerType) throws FileNotFoundException, IOException, IllegalArgumentException {
+ Properties properties = new Properties();
+ properties.load(new FileReader(file));
+ if (!properties.contains(containerType))
+ throw new IllegalArgumentException(String.format("container type '%s' not found in '%s'", containerType, file.getCanonicalPath()));
+ return Integer.parseInt((String) properties.get(containerType));
+ }
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ this.file = new File(properties.getProperty("path"));
+ }
+
+ @Override
+ public void start() throws Exception {
+ // left blank
+ }
+
+ @Override
+ public void stop() throws Exception {
+ // left blank
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/RedisTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/RedisTargetProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/RedisTargetProvider.java
new file mode 100644
index 0000000..723ac4d
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/RedisTargetProvider.java
@@ -0,0 +1,356 @@
+package org.apache.helix.autoscale.impl;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.autoscale.TargetProviderService;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Redis-specific target model based on recurring Tps benchmarking. Tps target
+ * and probed redis-server instances are configured via zookeeper. Tps target
+ * may change dynamically.
+ *
+ */
+public class RedisTargetProvider implements TargetProviderService {
+
+ static final Logger log = Logger.getLogger(RedisTargetProvider.class);
+
+ public static final String BENCHMARK_COMMAND = "redis-benchmark";
+ public static final String BENCHMARK_TESTS = "GET,SET";
+
+ public static final String DEFAULT_RECORDS = "100000";
+ public static final String DEFAULT_CLIENTS = "20";
+ public static final String DEFAULT_REQUESTS = "100000";
+ public static final String DEFAULT_TIMEOUT = "8000";
+ public static final String DEFAULT_INTERVAL = "10000";
+ public static final String DEFAULT_ALPHA = "0.25";
+
+ ZkClient zookeeper;
+
+ String address;
+ String root;
+
+ int records;
+ int clients;
+ int requests;
+ int timeout;
+ int interval;
+
+ int targetTpsGet;
+ int targetTpsSet;
+
+ int targetCountMin;
+ int targetCountMax;
+ int targetCount;
+
+ double alpha;
+ double averageTpsGet;
+ double averageTpsSet;
+ double averageCount;
+
+ ScheduledExecutorService executor;
+
+ @Override
+ public void configure(Properties properties) {
+ address = properties.getProperty("address");
+ root = properties.getProperty("root");
+ targetTpsGet = Integer.valueOf(properties.getProperty("get", "0"));
+ targetTpsSet = Integer.valueOf(properties.getProperty("set", "0"));
+ targetCountMin = Integer.valueOf(properties.getProperty("min", "-1"));
+ targetCountMax = Integer.valueOf(properties.getProperty("max", "-1"));
+ records = Integer.valueOf(properties.getProperty("records", DEFAULT_RECORDS));
+ clients = Integer.valueOf(properties.getProperty("clients", DEFAULT_CLIENTS));
+ requests = Integer.valueOf(properties.getProperty("requests", DEFAULT_REQUESTS));
+ timeout = Integer.valueOf(properties.getProperty("timeout", DEFAULT_TIMEOUT));
+ interval = Integer.valueOf(properties.getProperty("interval", DEFAULT_INTERVAL));
+ alpha = Double.valueOf(properties.getProperty("alpha", DEFAULT_ALPHA));
+ }
+
+ @Override
+ public void start() {
+ log.debug("starting redis status service");
+ zookeeper = new ZkClient(address);
+ zookeeper.createPersistent("/" + root, true);
+
+ try { zookeeper.createPersistent("/" + root + "/target.get", String.valueOf(targetTpsGet)); } catch (Exception ignore) {}
+ try { zookeeper.createPersistent("/" + root + "/target.set", String.valueOf(targetTpsSet)); } catch (Exception ignore) {}
+ try { zookeeper.createPersistent("/" + root + "/target.min", String.valueOf(targetCountMin)); } catch (Exception ignore) {}
+ try { zookeeper.createPersistent("/" + root + "/target.max", String.valueOf(targetCountMax)); } catch (Exception ignore) {}
+
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new RedisBenchmarkRunnable(), 0, interval, TimeUnit.MILLISECONDS);
+ }
+
+ @Override
+ public void stop() {
+ log.debug("stopping redis status service");
+ if (executor != null) {
+ executor.shutdownNow();
+ while (!executor.isTerminated()) {
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ }
+ executor = null;
+ }
+ if (zookeeper != null) {
+ zookeeper.close();
+ zookeeper = null;
+ }
+ }
+
+ @Override
+ public int getTargetContainerCount(String containerType) throws Exception {
+ return targetCount;
+ }
+
+ private class RedisBenchmarkRunnable implements Runnable {
+ ExecutorService executor = Executors.newCachedThreadPool();
+ RedisResult aggregateResult;
+
+ @Override
+ public void run() {
+ log.debug("running redis benchmark");
+
+ aggregateResult = new RedisResult(0);
+ Collection<Future<RedisResult>> futures = new ArrayList<Future<RedisResult>>();
+
+ try {
+ Collection<RedisTarget> targets = getTargets();
+
+ // start benchmark
+ for (RedisTarget target : targets) {
+ log.debug(String.format("submitting target '%s'", target));
+ Future<RedisResult> future = executor.submit(new RedisCallable(target));
+ futures.add(future);
+ }
+
+ // aggregate results
+ try {
+ log.debug("waiting for results");
+
+ long limit = System.currentTimeMillis() + timeout;
+ for (Future<RedisResult> future : futures) {
+ try {
+ RedisResult result = future.get(limit - System.currentTimeMillis(), TimeUnit.MILLISECONDS);
+ log.debug(String.format("got result '%s'", result));
+ aggregate(result);
+ } catch (Exception e) {
+ log.warn(String.format("failed to get result"));
+ future.cancel(true);
+ }
+ }
+ } catch (Exception e) {
+ log.error("Error running redis benchmark", e);
+
+ for (Future<RedisResult> future : futures) {
+ future.cancel(true);
+ }
+
+ return;
+ }
+
+ // compare to thresholds
+ log.debug(String.format("aggregate result is '%s'", aggregateResult));
+
+ // get target from zookeeper
+ try { targetTpsGet = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.get")); } catch (Exception ignore) {}
+ try { targetTpsSet = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.set")); } catch (Exception ignore) {}
+ try { targetCountMin = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.min")); } catch (Exception ignore) {}
+ try { targetCountMax = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.max")); } catch (Exception ignore) {}
+
+ averageCount = alpha * aggregateResult.serverCount + (1.0 - alpha) * averageCount;
+
+ // calculate counts
+ int targetCountGet = -1;
+ if (aggregateResult.containsKey("GET")) {
+ double tpsTarget = targetTpsGet;
+ double tps = aggregateResult.get("GET");
+
+ averageTpsGet = alpha * tps + (1.0 - alpha) * averageTpsGet;
+
+ targetCountGet = (int) Math.ceil(tpsTarget / averageTpsGet * averageCount);
+ log.debug(String.format("count.get=%d, target.get=%f, tps.get=%f, tps.avg.get=%f, count.avg=%f", targetCountGet, tpsTarget, tps,
+ averageTpsGet, averageCount));
+ }
+
+ int targetCountSet = -1;
+ if (aggregateResult.containsKey("SET")) {
+ double tpsTarget = targetTpsSet;
+ double tps = aggregateResult.get("SET");
+
+ averageTpsSet = alpha * tps + (1.0 - alpha) * averageTpsSet;
+
+ targetCountSet = (int) Math.ceil(tpsTarget / averageTpsSet * averageCount);
+ log.debug(String.format("count.set=%d, target.set=%f, tps.set=%f, tps.avg.set=%f, count.avg=%f", targetCountSet, tpsTarget, tps,
+ averageTpsSet, averageCount));
+ }
+
+ targetCount = Math.max(targetCountGet, targetCountSet);
+
+ if (targetCountMin > 0)
+ targetCount = Math.max(targetCount, targetCountMin);
+ if (targetCountMax > 0)
+ targetCount = Math.min(targetCount, targetCountMax);
+
+ targetCount = Math.max(targetCount, 1);
+
+ log.debug(String.format("target count is %d", targetCount));
+ RedisTargetProvider.this.targetCount = targetCount;
+
+ } catch (Exception e) {
+ log.error("Error running redis benchmark", e);
+
+ for (Future<RedisResult> future : futures) {
+ future.cancel(true);
+ }
+ }
+
+ }
+
+ Collection<RedisTarget> getTargets() {
+ log.debug("fetching redis servers from zookeeper");
+ Collection<RedisTarget> targets = new ArrayList<RedisTarget>();
+ Collection<String> servers = zookeeper.getChildren("/" + root);
+
+ servers.remove("target.get");
+ servers.remove("target.set");
+ servers.remove("target.min");
+ servers.remove("target.max");
+
+ for (String server : servers) {
+ if (!zookeeper.exists("/" + root + "/" + server + "/heartbeat"))
+ continue;
+
+ String hostname = zookeeper.readData("/" + root + "/" + server + "/hostname");
+ int port = Integer.valueOf(zookeeper.<String> readData("/" + root + "/" + server + "/port"));
+
+ targets.add(new RedisTarget(hostname, port));
+ }
+
+ log.debug(String.format("found %d servers: %s", targets.size(), targets));
+ return targets;
+ }
+
+ void aggregate(RedisResult result) {
+ RedisResult newResult = new RedisResult(aggregateResult.serverCount + result.serverCount);
+
+ for (Entry<String, Double> entry : result.entrySet()) {
+ double current = 0.0d;
+ if (aggregateResult.containsKey(entry.getKey()))
+ current = aggregateResult.get(entry.getKey());
+
+ current += entry.getValue();
+ newResult.put(entry.getKey(), current);
+ }
+
+ aggregateResult = newResult;
+ }
+ }
+
+ private static class RedisTarget {
+ final String hostname;
+ final int port;
+
+ public RedisTarget(String hostname, int port) {
+ this.hostname = hostname;
+ this.port = port;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("%s:%d", hostname, port);
+ }
+ }
+
+ private static class RedisResult extends HashMap<String, Double> {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 4599748807597500952L;
+
+ final int serverCount;
+
+ public RedisResult(int serverCount) {
+ this.serverCount = serverCount;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("[serverCount=%d %s]", serverCount, super.toString());
+ }
+ }
+
+ private class RedisCallable implements Callable<RedisResult> {
+ final RedisTarget target;
+
+ public RedisCallable(RedisTarget target) {
+ this.target = target;
+ }
+
+ @Override
+ public RedisResult call() throws Exception {
+ log.debug(String.format("executing benchmark for '%s'", target));
+
+ ProcessBuilder builder = new ProcessBuilder();
+ builder.command(BENCHMARK_COMMAND, "-h", target.hostname, "-p", String.valueOf(target.port), "-r", String.valueOf(records), "-n",
+ String.valueOf(requests), "-c", String.valueOf(clients), "-t", BENCHMARK_TESTS, "--csv");
+ Process process = builder.start();
+
+ log.debug(String.format("running '%s'", builder.command()));
+
+ RedisResult result = new RedisResult(1);
+
+ int retVal;
+ try {
+ retVal = process.waitFor();
+ } catch (InterruptedException e) {
+ process.destroy();
+ return result;
+ }
+
+ Preconditions.checkState(retVal == 0, "Benchmark process returned %s", retVal);
+
+ Pattern pattern = Pattern.compile("\"([A-Z0-9_]+).*\",\"([0-9\\.]+)\"");
+
+ log.debug("parsing output");
+ BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));
+ String line = null;
+ while ((line = reader.readLine()) != null) {
+ Matcher matcher = pattern.matcher(line);
+
+ if (!matcher.find())
+ continue;
+
+ String key = matcher.group(1);
+ Double value = Double.valueOf(matcher.group(2));
+
+ result.put(key, value);
+ }
+
+ log.debug(String.format("benchmark for '%s' returned '%s'", target, result));
+
+ return result;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/StaticTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/StaticTargetProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/StaticTargetProvider.java
new file mode 100644
index 0000000..346f0fe
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/StaticTargetProvider.java
@@ -0,0 +1,62 @@
+package org.apache.helix.autoscale.impl;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.apache.helix.autoscale.TargetProviderService;
+import org.apache.log4j.Logger;
+
+/**
+ * Target model based on manually set count. Count may change dynamically.
+ *
+ */
+public class StaticTargetProvider implements TargetProviderService {
+ static final Logger log = Logger.getLogger(StaticTargetProvider.class);
+
+ final Map<String, Integer> targetCounts = new HashMap<String, Integer>();
+
+ public StaticTargetProvider() {
+ // left blank
+ }
+
+ public StaticTargetProvider(Map<String, Integer> targetCounts) {
+ this.targetCounts.putAll(targetCounts);
+ }
+
+ @Override
+ public int getTargetContainerCount(String containerType) {
+ return targetCounts.get(containerType);
+ }
+
+ public void setTargetContainerCount(String containerType, int targetCount) {
+ targetCounts.put(containerType, targetCount);
+ }
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ for (Entry<Object, Object> entry : properties.entrySet()) {
+ String key = (String) entry.getKey();
+
+ try {
+ int value = Integer.valueOf((String) entry.getValue());
+ log.debug(String.format("Inserting value '%s = %d'", key, value));
+ targetCounts.put(key, value);
+ } catch (NumberFormatException e) {
+ log.warn(String.format("Skipping '%s', not an integer (value='%s')", key, (String) entry.getValue()));
+ }
+ }
+ }
+
+ @Override
+ public void start() throws Exception {
+ // left blank
+ }
+
+ @Override
+ public void stop() throws Exception {
+ // left blank
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/DummyMasterSlaveProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/DummyMasterSlaveProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/DummyMasterSlaveProcess.java
new file mode 100644
index 0000000..683249d
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/DummyMasterSlaveProcess.java
@@ -0,0 +1,76 @@
+package org.apache.helix.autoscale.impl.container;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.autoscale.container.ContainerProcess;
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Sample implementation of container with Helix participant for MasterSlave
+ * state model. Print state transitions only.
+ *
+ */
+public class DummyMasterSlaveProcess extends ContainerProcess {
+
+ static final Logger log = Logger.getLogger(DummyMasterSlaveProcess.class);
+
+ public DummyMasterSlaveProcess(ContainerProcessProperties properties) throws Exception {
+ configure(properties);
+ setModelName("MasterSlave");
+ setModelFactory(new DummyMasterSlaveModelFactory());
+ }
+
+ @Override
+ protected void startContainer() throws Exception {
+ log.info("starting dummy process container");
+ }
+
+ @Override
+ protected void stopContainer() throws Exception {
+ log.info("stopping dummy process container");
+ }
+
+ public static class DummyMasterSlaveModelFactory extends StateModelFactory<DummyMasterSlaveStateModel> {
+ @Override
+ public DummyMasterSlaveStateModel createNewStateModel(String partitionName) {
+ return new DummyMasterSlaveStateModel();
+ }
+ }
+
+ @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+ public static class DummyMasterSlaveStateModel extends StateModel {
+
+ static final Logger log = Logger.getLogger(DummyMasterSlaveStateModel.class);
+
+ @Transition(from = "OFFLINE", to = "SLAVE")
+ public void offlineToSlave(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to SLAVE", context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "SLAVE", to = "OFFLINE")
+ public void slaveToOffline(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from SLAVE to OFFLINE", context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "SLAVE", to = "MASTER")
+ public void slaveToMaster(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from SLAVE to MASTER", context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "MASTER", to = "SLAVE")
+ public void masterToSlave(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from MASTER to SLAVE", context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void offlineToDropped(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to DROPPED", context.getManager().getInstanceName()));
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/DummyOnlineOfflineProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/DummyOnlineOfflineProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/DummyOnlineOfflineProcess.java
new file mode 100644
index 0000000..a0aad8e
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/DummyOnlineOfflineProcess.java
@@ -0,0 +1,66 @@
+package org.apache.helix.autoscale.impl.container;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.autoscale.container.ContainerProcess;
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Sample implementation of container with Helix participant for OnlineOffline
+ * state model. Print state transitions only.
+ *
+ */
+public class DummyOnlineOfflineProcess extends ContainerProcess {
+
+ static final Logger log = Logger.getLogger(DummyOnlineOfflineProcess.class);
+
+ public DummyOnlineOfflineProcess(ContainerProcessProperties properties) throws Exception {
+ configure(properties);
+ setModelName("OnlineOffline");
+ setModelFactory(new DummyOnlineOfflineModelFactory());
+ }
+
+ @Override
+ protected void startContainer() throws Exception {
+ log.info("starting dummy online-offline process container");
+ }
+
+ @Override
+ protected void stopContainer() throws Exception {
+ log.info("stopping dummy online-offline process container");
+ }
+
+ public static class DummyOnlineOfflineModelFactory extends StateModelFactory<DummyOnlineOfflineStateModel> {
+ @Override
+ public DummyOnlineOfflineStateModel createNewStateModel(String partitionName) {
+ return new DummyOnlineOfflineStateModel();
+ }
+ }
+
+ @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE", "DROPPED" })
+ public static class DummyOnlineOfflineStateModel extends StateModel {
+
+ static final Logger log = Logger.getLogger(DummyOnlineOfflineStateModel.class);
+
+ @Transition(from = "OFFLINE", to = "ONLINE")
+ public void offlineToOnline(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to ONLINE", context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "ONLINE", to = "OFFLINE")
+ public void onlineToOffline(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from ONLINE to OFFLINE", context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void offlineToDropped(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to DROPPED", context.getManager().getInstanceName()));
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/RedisServerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/RedisServerProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/RedisServerProcess.java
new file mode 100644
index 0000000..5f6f745
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/RedisServerProcess.java
@@ -0,0 +1,140 @@
+package org.apache.helix.autoscale.impl.container;
+
+import java.net.InetAddress;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.NotificationContext;
+import org.apache.helix.autoscale.container.ContainerProcess;
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Container implementation for redis-server. Uses OnlineOffline model, spawns
+ * Redis as Shell process and writes metadata to zookeeper.
+ *
+ */
+public class RedisServerProcess extends ContainerProcess {
+
+ static final Logger log = Logger.getLogger(RedisServerProcess.class);
+
+ public static final String REDIS_SERVER_COMMAND = "redis-server";
+
+ public static final long MONITOR_INTERVAL = 5000;
+
+ ZkClient zookeeper;
+
+ final String address;
+ final String root;
+ final String name;
+ final int basePort;
+
+ Process process;
+
+ ScheduledExecutorService executor;
+
+ public RedisServerProcess(ContainerProcessProperties properties) throws Exception {
+ configure(properties);
+ setModelName("OnlineOffline");
+ setModelFactory(new RedisServerModelFactory());
+
+ address = properties.getProperty("address");
+ root = properties.getProperty("root");
+ basePort = Integer.valueOf(properties.getProperty("baseport"));
+ name = properties.getProperty(ContainerProcessProperties.NAME);
+ }
+
+ @Override
+ protected void startContainer() throws Exception {
+ log.info(String.format("starting redis server container for instance '%s'", name));
+
+ String hostname = InetAddress.getLocalHost().getHostName();
+ int port = basePort + Integer.valueOf(name.split("_")[1]);
+
+ log.debug(String.format("Starting redis server at '%s:%d'", hostname, port));
+
+ ProcessBuilder builder = new ProcessBuilder();
+ builder.command(REDIS_SERVER_COMMAND, "--port", String.valueOf(port));
+ process = builder.start();
+
+ log.debug("Updating zookeeper");
+ zookeeper = new ZkClient(address);
+ zookeeper.deleteRecursive("/" + root + "/" + name);
+ zookeeper.createPersistent("/" + root + "/" + name, true);
+ zookeeper.createPersistent("/" + root + "/" + name + "/hostname", hostname);
+ zookeeper.createPersistent("/" + root + "/" + name + "/port", String.valueOf(port));
+ zookeeper.createEphemeral("/" + root + "/" + name + "/heartbeat");
+
+ log.debug("Starting process monitor");
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new ProcessMonitor(), 0, MONITOR_INTERVAL, TimeUnit.MILLISECONDS);
+
+ }
+
+ @Override
+ protected void stopContainer() throws Exception {
+ log.info("stopping redis server container");
+
+ log.debug("Stopping process monitor");
+ executor.shutdownNow();
+
+ log.debug("Updating zookeeper");
+ zookeeper.deleteRecursive("/" + root + "/" + name);
+ zookeeper.close();
+
+ log.debug("Stopping process");
+ process.destroy();
+ process.waitFor();
+ }
+
+ public class RedisServerModelFactory extends StateModelFactory<RedisServerModel> {
+ @Override
+ public RedisServerModel createNewStateModel(String partitionName) {
+ return new RedisServerModel();
+ }
+ }
+
+ @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE", "DROPPED" })
+ public class RedisServerModel extends StateModel {
+
+ @Transition(from = "OFFLINE", to = "ONLINE")
+ public void offlineToSlave(Message m, NotificationContext context) {
+ // left blank
+ log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+ }
+
+ @Transition(from = "ONLINE", to = "OFFLINE")
+ public void slaveToOffline(Message m, NotificationContext context) {
+ // left blank
+ log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void offlineToDropped(Message m, NotificationContext context) {
+ // left blank
+ log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+ }
+
+ }
+
+ private class ProcessMonitor implements Runnable {
+ @Override
+ public void run() {
+ try {
+ process.exitValue();
+ log.warn("detected process failure");
+ fail();
+ } catch (Exception e) {
+ // expected
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/ZookeeperMasterSlaveProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/ZookeeperMasterSlaveProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/ZookeeperMasterSlaveProcess.java
new file mode 100644
index 0000000..0c1b728
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/container/ZookeeperMasterSlaveProcess.java
@@ -0,0 +1,108 @@
+package org.apache.helix.autoscale.impl.container;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.NotificationContext;
+import org.apache.helix.autoscale.container.ContainerProcess;
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Sample implementation of container with Helix participant for MasterSlave
+ * state model. Writes current state to separate zookeeper domain.
+ *
+ */
+public class ZookeeperMasterSlaveProcess extends ContainerProcess {
+
+ static final Logger log = Logger.getLogger(ZookeeperMasterSlaveProcess.class);
+
+ ZkClient zookeeper;
+
+ final String address;
+ final String root;
+ final String name;
+
+ public ZookeeperMasterSlaveProcess(ContainerProcessProperties properties) throws Exception {
+ configure(properties);
+ setModelName("MasterSlave");
+ setModelFactory(new ZookeeperMasterSlaveModelFactory());
+
+ address = properties.getProperty("address");
+ root = properties.getProperty("root");
+ name = properties.getProperty(ContainerProcessProperties.NAME);
+ }
+
+ @Override
+ protected void startContainer() throws Exception {
+ log.info("starting zookeeper process container");
+
+ zookeeper = new ZkClient(address);
+ zookeeper.createPersistent("/" + root + "/" + name, true);
+ }
+
+ @Override
+ protected void stopContainer() throws Exception {
+ log.info("stopping zookeeper process container");
+
+ zookeeper.close();
+ }
+
+ public class ZookeeperMasterSlaveModelFactory extends StateModelFactory<ZookeeperMasterSlaveModel> {
+ @Override
+ public ZookeeperMasterSlaveModel createNewStateModel(String partitionName) {
+ return new ZookeeperMasterSlaveModel();
+ }
+ }
+
+ @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+ public class ZookeeperMasterSlaveModel extends StateModel {
+
+ @Transition(from = "OFFLINE", to = "SLAVE")
+ public void offlineToSlave(Message m, NotificationContext context) {
+ transition(m, context);
+ }
+
+ @Transition(from = "SLAVE", to = "OFFLINE")
+ public void slaveToOffline(Message m, NotificationContext context) {
+ transition(m, context);
+ }
+
+ @Transition(from = "SLAVE", to = "MASTER")
+ public void slaveToMaster(Message m, NotificationContext context) {
+ transition(m, context);
+ }
+
+ @Transition(from = "MASTER", to = "SLAVE")
+ public void masterToSlave(Message m, NotificationContext context) {
+ transition(m, context);
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void offlineToDropped(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+
+ String resource = m.getResourceName();
+ String partition = m.getPartitionName();
+ String path = "/" + root + "/" + name + "/" + resource + "_" + partition;
+
+ zookeeper.delete(path);
+ }
+
+ public void transition(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+
+ String resource = m.getResourceName();
+ String partition = m.getPartitionName();
+ String path = "/" + root + "/" + name + "/" + resource + "_" + partition;
+
+ zookeeper.delete(path);
+ zookeeper.createEphemeral(path, m.getToState());
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerProvider.java
new file mode 100644
index 0000000..7e5d553
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerProvider.java
@@ -0,0 +1,119 @@
+package org.apache.helix.autoscale.impl.local;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.autoscale.ContainerProvider;
+import org.apache.helix.autoscale.ContainerProviderService;
+import org.apache.helix.autoscale.container.ContainerProcess;
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+import org.apache.helix.autoscale.container.ContainerUtils;
+import org.apache.helix.autoscale.impl.local.LocalContainerSingleton.LocalProcess;
+import org.apache.helix.autoscale.provider.ProviderProperties;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * {@link ContainerProvider} spawning VM-local containers. Only works in single-VM
+ * deployments as container metadata is managed via singleton.
+ *
+ * @see LocalContainerSingleton
+ */
+class LocalContainerProvider implements ContainerProviderService {
+
+ static final Logger log = Logger.getLogger(LocalContainerProvider.class);
+
+ final Map<String, Properties> types = new HashMap<String, Properties>();
+
+ String address;
+ String cluster;
+ String name;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ ProviderProperties providerProperties = new ProviderProperties();
+ providerProperties.putAll(properties);
+ Preconditions.checkArgument(providerProperties.isValid());
+
+ this.address = providerProperties.getProperty("address");
+ this.cluster = providerProperties.getProperty("cluster");
+ this.name = providerProperties.getProperty("name");
+
+ for (String containerType : providerProperties.getContainers()) {
+ registerType(containerType, providerProperties.getContainer(containerType));
+ }
+ }
+
+ @Override
+ public void start() throws Exception {
+ // left blank
+ }
+
+ @Override
+ public void stop() throws Exception {
+ destroyAll();
+ }
+
+ @Override
+ public void create(String id, String type) throws Exception {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ Preconditions.checkState(!processes.containsKey(id), "Process '%s' already exists", id);
+ Preconditions.checkState(types.containsKey(type), "Type '%s' is not registered", type);
+
+ ContainerProcessProperties properties = new ContainerProcessProperties(types.get(type));
+
+ properties.setProperty(ContainerProcessProperties.CLUSTER, cluster);
+ properties.setProperty(ContainerProcessProperties.NAME, id);
+ properties.setProperty(ContainerProcessProperties.ADDRESS, address);
+
+ log.info(String.format("Running container '%s' (properties='%s')", id, properties));
+
+ ContainerProcess process = ContainerUtils.createProcess(properties);
+ process.start();
+
+ processes.put(id, new LocalProcess(id, name, process));
+
+ }
+ }
+
+ @Override
+ public void destroy(String id) throws Exception {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ if (!processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+ log.info(String.format("Destroying container '%s'", id));
+
+ LocalProcess local = processes.remove(id);
+
+ local.process.stop();
+ }
+ }
+
+ @Override
+ public void destroyAll() {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ log.info("Destroying all processes");
+ for (LocalProcess local : new HashSet<LocalProcess>(processes.values())) {
+ if (local.owner.equals(name)) {
+ try { destroy(local.id); } catch (Exception ignore) {}
+ }
+ }
+ }
+ }
+
+ void registerType(String name, Properties properties) {
+ log.debug(String.format("Registering container type '%s' (properties='%s')", name, properties));
+ types.put(name, properties);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerProviderProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerProviderProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerProviderProcess.java
new file mode 100644
index 0000000..ca1047c
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerProviderProcess.java
@@ -0,0 +1,45 @@
+package org.apache.helix.autoscale.impl.local;
+
+import java.util.Properties;
+
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.provider.ProviderProcess;
+import org.apache.helix.autoscale.provider.ProviderProperties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Configurable and runnable service for {@link LocalContainerProvider}.
+ *
+ */
+public class LocalContainerProviderProcess implements Service {
+ LocalContainerProvider provider;
+ ProviderProcess process;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ ProviderProperties providerProperties = new ProviderProperties();
+ providerProperties.putAll(properties);
+
+ Preconditions.checkArgument(providerProperties.isValid(), "provider properties not valid (properties='%s')", properties);
+
+ provider = new LocalContainerProvider();
+ provider.configure(properties);
+
+ process = new ProviderProcess();
+ process.configure(providerProperties);
+ process.setConteinerProvider(provider);
+ }
+
+ @Override
+ public void start() throws Exception {
+ provider.start();
+ process.start();
+ }
+
+ @Override
+ public void stop() throws Exception {
+ process.stop();
+ provider.stop();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerSingleton.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerSingleton.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerSingleton.java
new file mode 100644
index 0000000..74f9279
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalContainerSingleton.java
@@ -0,0 +1,56 @@
+package org.apache.helix.autoscale.impl.local;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.helix.autoscale.container.ContainerProcess;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Singleton tracking metadata for VM-local containers spawned via
+ * {@link LocalContainerProvider}.
+ *
+ */
+public class LocalContainerSingleton {
+ final static Map<String, LocalProcess> processes = new HashMap<String, LocalProcess>();
+
+ private LocalContainerSingleton() {
+ // left blank
+ }
+
+ public static Map<String, LocalProcess> getProcesses() {
+ return processes;
+ }
+
+ public static void reset() {
+ synchronized (processes) {
+ for (LocalProcess local : processes.values()) {
+ local.process.stop();
+ }
+ processes.clear();
+ }
+ }
+
+ public static void killProcess(String id) throws InterruptedException {
+ synchronized (processes) {
+ Preconditions.checkArgument(processes.containsKey(id), "Process '%s' does not exist", id);
+ ContainerProcess process = processes.get(id).process;
+ process.stop();
+ processes.remove(id);
+ }
+ }
+
+ static class LocalProcess {
+ final String id;
+ final String owner;
+ final ContainerProcess process;
+
+ public LocalProcess(String id, String owner, ContainerProcess process) {
+ this.id = id;
+ this.owner = owner;
+ this.process = process;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalStatusProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalStatusProvider.java
new file mode 100644
index 0000000..7cb02bb
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/local/LocalStatusProvider.java
@@ -0,0 +1,53 @@
+package org.apache.helix.autoscale.impl.local;
+
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.autoscale.StatusProviderService;
+import org.apache.helix.autoscale.impl.local.LocalContainerSingleton.LocalProcess;
+
+/**
+ * StatusProvider for VM-local containers spawned via
+ * {@link LocalContainerProvider}. Runnable and configurable service.
+ *
+ */
+public class LocalStatusProvider implements StatusProviderService {
+
+ @Override
+ public boolean exists(String id) {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ return processes.containsKey(id);
+ }
+ }
+
+ @Override
+ public boolean isHealthy(String id) {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ LocalProcess local = processes.get(id);
+
+ if (local == null)
+ return false;
+
+ return local.process.isActive();
+ }
+ }
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ // left blank
+ }
+
+ @Override
+ public void start() throws Exception {
+ // left blank
+ }
+
+ @Override
+ public void stop() throws Exception {
+ // left blank
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProcess.java
new file mode 100644
index 0000000..6847ac5
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProcess.java
@@ -0,0 +1,93 @@
+package org.apache.helix.autoscale.impl.shell;
+
+import java.io.File;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.helix.autoscale.container.ContainerProcess;
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+import org.apache.helix.autoscale.container.ContainerUtils;
+import org.apache.log4j.Logger;
+
+/**
+ * Host process for Shell-based container. ContainerProcess configuration is
+ * read from path in first command-line argument. Status is maintained using
+ * temporary marker file. (Program entry point)
+ *
+ */
+class ShellContainerProcess {
+ static final Logger log = Logger.getLogger(ShellContainerProcess.class);
+
+ public static final long MONITOR_INTERVAL = 5000;
+
+ static String markerDir;
+ static ContainerProcess process;
+ static ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
+
+ public static void main(String[] args) throws Exception {
+ final String propertiesPath = args[0];
+ markerDir = args[1];
+
+ ContainerProcessProperties properties = ContainerUtils.getPropertiesFromPath(propertiesPath);
+
+ process = ContainerUtils.createProcess(properties);
+
+ log.debug("Installing shutdown hooks");
+ Runtime.getRuntime().addShutdownHook(new Thread() {
+ @Override
+ public void run() {
+ log.debug("Running shutdown hook");
+ try {
+ ShellContainerProcess.stop();
+ } catch (Exception ignore) {
+ }
+ }
+ });
+
+ log.debug("Launching shell container process");
+ process.start();
+
+ ShellUtils.createMarker(new File(markerDir));
+
+ log.debug("Launching process monitor");
+ executor.scheduleAtFixedRate(new ProcessMonitor(), 0, MONITOR_INTERVAL, TimeUnit.MILLISECONDS);
+ }
+
+ static void stop() throws InterruptedException {
+ log.debug("Shutting down shell process");
+ if (process != null) {
+ process.stop();
+ ShellUtils.destroyMarker(new File(markerDir));
+ }
+ if (executor != null) {
+ executor.shutdownNow();
+ while (!executor.isTerminated()) {
+ Thread.sleep(100);
+ }
+ executor = null;
+ }
+ }
+
+ static class ProcessMonitor implements Runnable {
+ @Override
+ public void run() {
+ if (process.isFailed()) {
+ log.warn("detected process failure");
+ try {
+ ShellContainerProcess.stop();
+ } catch (Exception ignore) {
+ }
+ System.exit(1);
+ }
+ if (!process.isActive()) {
+ log.warn("detected process shutdown");
+ try {
+ ShellContainerProcess.stop();
+ } catch (Exception ignore) {
+ }
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProvider.java
new file mode 100644
index 0000000..df4c6ef
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProvider.java
@@ -0,0 +1,151 @@
+package org.apache.helix.autoscale.impl.shell;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.autoscale.ContainerProvider;
+import org.apache.helix.autoscale.ContainerProviderService;
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+import org.apache.helix.autoscale.impl.shell.ShellContainerSingleton.ShellProcess;
+import org.apache.helix.autoscale.provider.ProviderProperties;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.io.Files;
+
+/**
+ * {@link ContainerProvider} spawning shell-based containers. Only works in single-VM
+ * deployments as container metadata is managed via singleton.
+ *
+ * @see ShellContainerSingleton
+ */
+class ShellContainerProvider implements ContainerProviderService {
+
+ static final Logger log = Logger.getLogger(ShellContainerProvider.class);
+
+ static final String RUN_COMMAND = "/bin/sh";
+
+ static final long POLL_INTERVAL = 1000;
+ static final long CONTAINER_TIMEOUT = 60000;
+
+ // global view of processes required
+ static final Map<String, ShellProcess> processes = new HashMap<String, ShellProcess>();
+
+ final Map<String, Properties> types = new HashMap<String, Properties>();
+
+ String address;
+ String cluster;
+ String name;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ Preconditions.checkNotNull(properties);
+ ProviderProperties providerProperties = new ProviderProperties();
+ providerProperties.putAll(properties);
+ Preconditions.checkArgument(providerProperties.isValid());
+
+ this.address = providerProperties.getProperty("address");
+ this.cluster = providerProperties.getProperty("cluster");
+ this.name = providerProperties.getProperty("name");
+
+ for (String containerType : providerProperties.getContainers()) {
+ registerType(containerType, providerProperties.getContainer(containerType));
+ }
+ }
+
+ @Override
+ public void start() throws Exception {
+ // left blank
+ }
+
+ @Override
+ public void stop() throws Exception {
+ destroyAll();
+ }
+
+ @Override
+ public void create(String id, String type) throws Exception {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ Preconditions.checkState(!processes.containsKey(id), "Process '%s' already exists", id);
+ Preconditions.checkState(types.containsKey(type), "Type '%s' is not registered", type);
+
+ ContainerProcessProperties properties = new ContainerProcessProperties(types.get(type));
+
+ properties.setProperty(ContainerProcessProperties.CLUSTER, cluster);
+ properties.setProperty(ContainerProcessProperties.NAME, id);
+ properties.setProperty(ContainerProcessProperties.ADDRESS, address);
+
+ File tmpDir = Files.createTempDir();
+ File tmpProperties = new File(tmpDir.getCanonicalPath() + File.separator + ShellUtils.SHELL_CONTAINER_PROPERTIES);
+ File tmpMarker = new File(tmpDir.getCanonicalPath());
+
+ properties.store(new FileWriter(tmpProperties), id);
+
+ log.info(String.format("Running container '%s' (properties='%s')", id, properties));
+
+ log.debug(String.format("Invoking command '%s %s %s %s'", RUN_COMMAND, ShellUtils.SHELL_CONTAINER_PATH, tmpProperties.getCanonicalPath(),
+ tmpMarker.getCanonicalPath()));
+
+ ProcessBuilder builder = new ProcessBuilder();
+ builder.command(RUN_COMMAND, ShellUtils.SHELL_CONTAINER_PATH, tmpProperties.getCanonicalPath(), tmpMarker.getCanonicalPath());
+
+ Process process = builder.start();
+
+ processes.put(id, new ShellProcess(id, name, process, tmpDir));
+
+ long limit = System.currentTimeMillis() + CONTAINER_TIMEOUT;
+ while (!ShellUtils.hasMarker(tmpDir)) {
+ if (System.currentTimeMillis() >= limit) {
+ throw new TimeoutException(String.format("Container '%s' failed to reach active state", id));
+ }
+ Thread.sleep(POLL_INTERVAL);
+ }
+ }
+ }
+
+ @Override
+ public void destroy(String id) throws Exception {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ if (!processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+ log.info(String.format("Destroying container '%s'", id));
+
+ ShellProcess shell = processes.remove(id);
+ shell.process.destroy();
+ shell.process.waitFor();
+
+ FileUtils.deleteDirectory(shell.tmpDir);
+ }
+ }
+
+ @Override
+ public void destroyAll() {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ log.info("Destroying all owned processes");
+ for (ShellProcess process : new HashSet<ShellProcess>(processes.values())) {
+ if (process.owner.equals(name)) {
+ try { destroy(process.id); } catch (Exception ignore) {}
+ }
+ }
+ }
+ }
+
+ void registerType(String name, Properties properties) {
+ log.debug(String.format("Registering container type '%s' (properties='%s')", name, properties));
+ types.put(name, properties);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProviderProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProviderProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProviderProcess.java
new file mode 100644
index 0000000..1148b4e
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerProviderProcess.java
@@ -0,0 +1,45 @@
+package org.apache.helix.autoscale.impl.shell;
+
+import java.util.Properties;
+
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.provider.ProviderProcess;
+import org.apache.helix.autoscale.provider.ProviderProperties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Configurable and runnable service for {@link ShellContainerProvider}.
+ *
+ */
+public class ShellContainerProviderProcess implements Service {
+ ShellContainerProvider provider;
+ ProviderProcess process;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ ProviderProperties providerProperties = new ProviderProperties();
+ providerProperties.putAll(properties);
+
+ Preconditions.checkArgument(providerProperties.isValid(), "provider properties not valid (properties='%s')", properties);
+
+ provider = new ShellContainerProvider();
+ provider.configure(properties);
+
+ process = new ProviderProcess();
+ process.configure(providerProperties);
+ process.setConteinerProvider(provider);
+ }
+
+ @Override
+ public void start() throws Exception {
+ provider.start();
+ process.start();
+ }
+
+ @Override
+ public void stop() throws Exception {
+ process.stop();
+ provider.stop();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerSingleton.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerSingleton.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerSingleton.java
new file mode 100644
index 0000000..a82baea
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellContainerSingleton.java
@@ -0,0 +1,58 @@
+package org.apache.helix.autoscale.impl.shell;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Singleton tracking metadata for shell-based containers spawned via
+ * {@link ShellContainerProvider}.
+ *
+ */
+public class ShellContainerSingleton {
+ static final Map<String, ShellProcess> processes = new HashMap<String, ShellProcess>();
+
+ private ShellContainerSingleton() {
+ // left blank
+ }
+
+ public static Map<String, ShellProcess> getProcesses() {
+ return processes;
+ }
+
+ public static void reset() {
+ synchronized (processes) {
+ for (ShellProcess shell : processes.values()) {
+ shell.process.destroy();
+ try { shell.process.waitFor(); } catch(Exception ignore) {}
+ }
+ processes.clear();
+ }
+ }
+
+ public static void killProcess(String id) throws InterruptedException {
+ synchronized (processes) {
+ Preconditions.checkArgument(processes.containsKey(id), "Process '%s' does not exist", id);
+ Process process = processes.get(id).process;
+ process.destroy();
+ process.waitFor();
+ processes.remove(id);
+ }
+ }
+
+ static class ShellProcess {
+ final String id;
+ final String owner;
+ final Process process;
+ final File tmpDir;
+
+ public ShellProcess(String id, String owner, Process process, File tmpDir) {
+ this.id = id;
+ this.owner = owner;
+ this.process = process;
+ this.tmpDir = tmpDir;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellStatusProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellStatusProvider.java
new file mode 100644
index 0000000..8094050
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellStatusProvider.java
@@ -0,0 +1,64 @@
+package org.apache.helix.autoscale.impl.shell;
+
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.autoscale.StatusProviderService;
+import org.apache.helix.autoscale.impl.shell.ShellContainerSingleton.ShellProcess;
+
+/**
+ * StatusProvider for shell-based containers spawned via
+ * {@link ShellContainerProvider}. Runnable and configurable service.
+ *
+ */
+public class ShellStatusProvider implements StatusProviderService {
+
+ @Override
+ public boolean exists(String id) {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ return processes.containsKey(id);
+ }
+ }
+
+ @Override
+ public boolean isHealthy(String id) {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ ShellProcess shell = processes.get(id);
+
+ if (shell == null)
+ return false;
+
+ if (!ShellUtils.hasMarker(shell.tmpDir))
+ return false;
+
+ try {
+ // exit value
+ shell.process.exitValue();
+ return false;
+ } catch (IllegalThreadStateException e) {
+ // expected
+ }
+
+ return true;
+ }
+ }
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ // left blank
+ }
+
+ @Override
+ public void start() throws Exception {
+ // left blank
+ }
+
+ @Override
+ public void stop() throws Exception {
+ // left blank
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellUtils.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellUtils.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellUtils.java
new file mode 100644
index 0000000..02df0e0
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/shell/ShellUtils.java
@@ -0,0 +1,54 @@
+package org.apache.helix.autoscale.impl.shell;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.log4j.Logger;
+
+/**
+ * Utility for creating and destroying temporary marker files for shell-based
+ * containers.
+ *
+ */
+class ShellUtils {
+
+ static final Logger log = Logger.getLogger(ShellUtils.class);
+
+ static final String SHELL_CONTAINER_PATH = "target/metamanager-pkg/bin/shell-container-process.sh";
+ static final String SHELL_CONTAINER_PROPERTIES = "container.properties";
+ static final String SHELL_CONTAINER_MARKER = "active";
+
+ private ShellUtils() {
+ // left blank
+ }
+
+ public static boolean hasMarker(File processDir) {
+ try {
+ log.debug(String.format("checking for marker file '%s'", getMarkerFile(processDir)));
+ if (getMarkerFile(processDir).exists())
+ return true;
+ } catch (IOException e) {
+ // ignore
+ }
+ return false;
+ }
+
+ public static void createMarker(File processDir) throws IOException {
+ log.debug(String.format("creating marker file '%s'", getMarkerFile(processDir)));
+ getMarkerFile(processDir).createNewFile();
+ }
+
+ public static void destroyMarker(File processDir) {
+ try {
+ log.debug(String.format("destroying marker file '%s'", getMarkerFile(processDir)));
+ getMarkerFile(processDir).delete();
+ } catch (IOException e) {
+ // ignore
+ }
+ }
+
+ public static File getMarkerFile(File processDir) throws IOException {
+ return new File(processDir.getCanonicalPath() + File.separatorChar + SHELL_CONTAINER_MARKER);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerData.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerData.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerData.java
new file mode 100644
index 0000000..4ebfb5d
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerData.java
@@ -0,0 +1,86 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+/**
+ * Container meta data for YARN-based containers. Reflect lifecycle of container
+ * from requesting, to bootstrapping, active operation and shutdown. Read and
+ * written by {@link YarnMasterProcess}, {@link YarnContainerProvider} and
+ * {@link YarnContainerService}. Also read by {@link YarnStatusProvider}.
+ * Typically stored in zookeeper
+ *
+ */
+class YarnContainerData {
+
+ static enum ContainerState {
+ ACQUIRE,
+ CONNECTING,
+ ACTIVE,
+ TEARDOWN,
+ FAILED,
+ HALTED,
+ FINALIZE
+ }
+
+ String id;
+ ContainerState state;
+ int yarnId;
+ String owner;
+ YarnContainerProcessProperties properties;
+
+ public YarnContainerData() {
+ // left blank
+ }
+
+ public YarnContainerData(String id, String owner, YarnContainerProcessProperties properties) {
+ this.id = id;
+ this.state = ContainerState.ACQUIRE;
+ this.yarnId = -1;
+ this.owner = owner;
+ this.properties = properties;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public YarnContainerData setId(String id) {
+ this.id = id;
+ return this;
+ }
+
+ public ContainerState getState() {
+ return state;
+ }
+
+ public YarnContainerData setState(ContainerState state) {
+ this.state = state;
+ return this;
+ }
+
+ public int getYarnId() {
+ return yarnId;
+ }
+
+ public YarnContainerData setYarnId(int yarnId) {
+ this.yarnId = yarnId;
+ return this;
+ }
+
+ public String getOwner() {
+ return owner;
+ }
+
+ public YarnContainerData setOwner(String owner) {
+ this.owner = owner;
+ return this;
+ }
+
+ public YarnContainerProcessProperties getProperties() {
+ return properties;
+ }
+
+ public YarnContainerData setProperties(YarnContainerProcessProperties properties) {
+ this.properties = properties;
+ return this;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProcess.java
new file mode 100644
index 0000000..5f8d006
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProcess.java
@@ -0,0 +1,53 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import org.apache.helix.autoscale.container.ContainerProcess;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Host process for {@link ContainerProcess}es spawned by
+ * {@link YarnContainerProvider}. Configured via *.properties file in working
+ * directory. Corresponds to regular container in YARN and is managed jointly by
+ * the application master and the Helix participant. (Program entry point)
+ *
+ */
+class YarnContainerProcess {
+ static final Logger log = Logger.getLogger(YarnContainerProcess.class);
+
+ public static void main(String[] args) throws Exception {
+ log.trace("BEGIN YarnProcess.main()");
+
+ final YarnContainerProcessProperties properties = YarnUtils.createContainerProcessProperties(YarnUtils
+ .getPropertiesFromPath(YarnUtils.YARN_CONTAINER_PROPERTIES));
+ Preconditions.checkArgument(properties.isValid(), "container properties not valid: %s", properties.toString());
+
+ log.debug("Launching yarndata service");
+ final ZookeeperYarnDataProvider metaService = new ZookeeperYarnDataProvider(properties.getYarnData());
+ metaService.start();
+
+ log.debug("Launching yarn container service");
+ final YarnContainerService yarnService = new YarnContainerService();
+ yarnService.configure(properties);
+ yarnService.setYarnDataProvider(metaService);
+ yarnService.start();
+
+ log.debug("Installing shutdown hooks");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ log.debug("Running shutdown hook");
+ yarnService.stop();
+ metaService.stop();
+ }
+ }));
+
+ System.out.println("Press ENTER to stop container process");
+ System.in.read();
+
+ log.debug("Stopping container services");
+ System.exit(0);
+
+ log.trace("END YarnProcess.main()");
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProcessProperties.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProcessProperties.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProcessProperties.java
new file mode 100644
index 0000000..5ad8f63
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProcessProperties.java
@@ -0,0 +1,40 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base configuration for {@link YarnContainerProcess}.
+ *
+ */
+public class YarnContainerProcessProperties extends ContainerProcessProperties {
+ /**
+ *
+ */
+ private static final long serialVersionUID = -2209509977839674160L;
+
+ public final static String YARNDATA = "yarndata";
+
+ public boolean isValid() {
+ return super.isValid() &&
+ containsKey(YARNDATA);
+ }
+
+ public String getYarnData() {
+ return getProperty(YARNDATA);
+ }
+
+ @Override
+ public Object get(Object key) {
+ Preconditions.checkState(containsKey(key));
+ return super.get(key);
+ }
+
+ @Override
+ public String getProperty(String key) {
+ Preconditions.checkState(containsKey(key));
+ return super.getProperty(key);
+ }
+
+}
[11/15] Adding Helix-task-framework and Yarn integration modules
Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProvider.java
new file mode 100644
index 0000000..d490edc
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProvider.java
@@ -0,0 +1,143 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.helix.autoscale.ContainerProvider;
+import org.apache.helix.autoscale.ContainerProviderService;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * {@link ContainerProvider} spawning YARN-based containers. Reads and writes
+ * meta data using {@link YarnDataProvider}. Works in a distributed setting, but
+ * typically requires access to zookeeper.
+ *
+ */
+class YarnContainerProvider implements ContainerProviderService {
+
+ static final Logger log = Logger.getLogger(YarnContainerProvider.class);
+
+ static final long POLL_INTERVAL = 1000;
+ static final long CONTAINER_TIMEOUT = 60000;
+
+ /*
+ * CONTAINERS
+ * A (A, READY)
+ * B (B, RUNNING)
+ */
+
+ final Object notifier = new Object();
+ final Map<String, Properties> types = new HashMap<String, Properties>();
+
+ ZookeeperYarnDataProvider yarnDataService;
+ YarnContainerProviderProcess yarnApp;
+ YarnContainerProviderProperties properties;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ YarnContainerProviderProperties yarnProps = new YarnContainerProviderProperties();
+ yarnProps.putAll(properties);
+ configure(yarnProps);
+ }
+
+ private void configure(YarnContainerProviderProperties properties) {
+ this.properties = properties;
+
+ for(String containerType : properties.getContainers()) {
+ registerType(containerType, properties.getContainer(containerType));
+ }
+ }
+
+ @Override
+ public void start() throws Exception {
+ Preconditions.checkNotNull(properties);
+ Preconditions.checkState(properties.isValid(), "provider properties not valid: %s", properties);
+
+ log.debug("Starting yarn container provider service");
+ yarnDataService = new ZookeeperYarnDataProvider();
+ yarnDataService.configure(properties);
+ yarnDataService.start();
+ }
+
+ @Override
+ public void stop() throws Exception {
+ log.debug("Stopping yarn container provider service");
+ destroyAll();
+
+ if(yarnDataService != null) {
+ yarnDataService.stop();
+ yarnDataService = null;
+ }
+ }
+
+ @Override
+ public void create(final String id, final String type) throws Exception {
+ Preconditions.checkArgument(types.containsKey(type), "Container type '%s' is not configured", type);
+
+ YarnContainerProcessProperties containerProperties = YarnUtils.createContainerProcessProperties(types.get(type));
+
+ log.info(String.format("Running container '%s' (properties='%s')", id, containerProperties));
+
+ yarnDataService.create(new YarnContainerData(id, properties.getName(), containerProperties));
+ waitForState(id, ContainerState.ACTIVE);
+ }
+
+ @Override
+ public void destroy(final String id) throws Exception {
+ YarnContainerData meta = yarnDataService.read(id);
+
+ if(meta.state == ContainerState.ACTIVE) {
+ log.info(String.format("Destroying active container, going to teardown"));
+ yarnDataService.update(meta.setState(ContainerState.TEARDOWN));
+
+ } else if(meta.state == ContainerState.FAILED) {
+ log.info(String.format("Destroying failed container, going to teardown"));
+ yarnDataService.update(meta.setState(ContainerState.TEARDOWN));
+
+ } else if(meta.state == ContainerState.FINALIZE) {
+ log.info(String.format("Destroying finalized container, skipping"));
+
+ } else {
+ throw new IllegalStateException(String.format("Container '%s' must be active, failed or finalized", id));
+ }
+
+ waitForState(id, ContainerState.FINALIZE);
+ yarnDataService.delete(id);
+ }
+
+ @Override
+ public void destroyAll() {
+ try {
+ for(YarnContainerData meta : yarnDataService.readAll()) {
+ if(meta.owner.equals(properties.getName())) {
+ try { destroy(meta.id); } catch (Exception ignore) {}
+ }
+ }
+ } catch (Exception ignore) {
+ // ignore
+ }
+ }
+
+ void waitForState(String id, ContainerState state) throws Exception, InterruptedException, TimeoutException {
+ long limit = System.currentTimeMillis() + CONTAINER_TIMEOUT;
+ YarnContainerData meta = yarnDataService.read(id);
+ while(meta.state != state) {
+ if(System.currentTimeMillis() >= limit) {
+ throw new TimeoutException(String.format("Container '%s' failed to reach state '%s' (currently is '%s')", id, state, meta.state));
+ }
+ Thread.sleep(POLL_INTERVAL);
+ meta = yarnDataService.read(id);
+ }
+ }
+
+ void registerType(String name, Properties properties) {
+ log.debug(String.format("Registering container type '%s' (properties='%s')", name, properties));
+ types.put(name, properties);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProviderProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProviderProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProviderProcess.java
new file mode 100644
index 0000000..20a8b92
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProviderProcess.java
@@ -0,0 +1,158 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.io.File;
+import java.net.InetSocketAddress;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ClientRMProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.autoscale.Service;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Configurable and runnable service for {@link YarnContainerProvider}
+ *
+ */
+public class YarnContainerProviderProcess implements Service {
+
+ static final Logger log = Logger.getLogger(YarnContainerProviderProcess.class);
+
+ static String YARN_MASTER_COMMAND = "/bin/sh %s 1>%s/stdout 2>%s/stderr";
+
+ Configuration conf;
+ YarnRPC rpc;
+ ClientRMProtocol rmClient;
+ ApplicationId appId;
+ File propertiesFile;
+
+ YarnContainerProviderProperties properties;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ configure(YarnUtils.createContainerProviderProperties(properties));
+ }
+
+ private void configure(YarnContainerProviderProperties properties) {
+ this.conf = new YarnConfiguration();
+ this.conf.set(YarnConfiguration.RM_ADDRESS, properties.getResourceManager());
+ this.conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, properties.getScheduler());
+ this.conf.set(FileSystem.FS_DEFAULT_NAME_KEY, properties.getHdfs());
+
+ this.rpc = YarnRPC.create(conf);
+
+ this.properties = properties;
+ }
+
+ @Override
+ public void start() throws Exception {
+ Preconditions.checkNotNull(properties);
+ Preconditions.checkState(properties.isValid());
+
+ connect();
+
+ String command = String.format(YARN_MASTER_COMMAND, YarnUtils.YARN_MASTER_PATH, ApplicationConstants.LOG_DIR_EXPANSION_VAR,
+ ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+
+ log.info(String.format("Starting application '%s' provider '%s' (masterCommand='%s')", properties.getYarnData(), properties.getName(), command));
+
+ log.debug(String.format("Running master command \"%s\"", command));
+
+ // app id
+ GetNewApplicationRequest appRequest = Records.newRecord(GetNewApplicationRequest.class);
+ GetNewApplicationResponse appResponse = rmClient.getNewApplication(appRequest);
+
+ this.appId = appResponse.getApplicationId();
+
+ log.info(String.format("Acquired app id '%s' for '%s'", appId.toString(), properties.getName()));
+
+ // command
+ ContainerLaunchContext launchContext = Records.newRecord(ContainerLaunchContext.class);
+ launchContext.setCommands(Collections.singletonList(command));
+
+ // resource limit
+ Resource resource = Records.newRecord(Resource.class);
+ resource.setMemory(256); // TODO make dynamic
+ launchContext.setResource(resource);
+
+ // environment
+ Map<String, String> env = new HashMap<String, String>();
+ launchContext.setEnvironment(env);
+
+ // configuration
+ propertiesFile = YarnUtils.writePropertiesToTemp(properties);
+
+ // HDFS
+ final String namespace = appId.toString();
+ final Path masterArchive = YarnUtils.copyToHdfs(YarnUtils.YARN_MASTER_ARCHIVE_PATH, YarnUtils.YARN_MASTER_STAGING, namespace, conf);
+ final Path masterProperties = YarnUtils.copyToHdfs(propertiesFile.getCanonicalPath(), YarnUtils.YARN_MASTER_PROPERTIES, namespace, conf);
+ final Path containerArchive = YarnUtils.copyToHdfs(YarnUtils.YARN_CONTAINER_ARCHIVE_PATH, YarnUtils.YARN_CONTAINER_STAGING, namespace, conf);
+
+ // local resources
+ Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
+ localResources.put(YarnUtils.YARN_MASTER_DESTINATION, YarnUtils.createHdfsResource(masterArchive, LocalResourceType.ARCHIVE, conf));
+ localResources.put(YarnUtils.YARN_MASTER_PROPERTIES, YarnUtils.createHdfsResource(masterProperties, LocalResourceType.FILE, conf));
+ localResources.put(YarnUtils.YARN_CONTAINER_STAGING, YarnUtils.createHdfsResource(containerArchive, LocalResourceType.FILE, conf));
+
+ launchContext.setLocalResources(localResources);
+
+ // user
+ launchContext.setUser(properties.getUser());
+
+ // app submission
+ ApplicationSubmissionContext subContext = Records.newRecord(ApplicationSubmissionContext.class);
+ subContext.setApplicationId(appId);
+ subContext.setApplicationName(properties.getName());
+ subContext.setAMContainerSpec(launchContext);
+
+ SubmitApplicationRequest subRequest = Records.newRecord(SubmitApplicationRequest.class);
+ subRequest.setApplicationSubmissionContext(subContext);
+
+ log.info(String.format("Starting app id '%s'", appId.toString()));
+
+ rmClient.submitApplication(subRequest);
+
+ }
+
+ @Override
+ public void stop() throws YarnRemoteException {
+ log.info(String.format("Stopping app id '%s'", appId.toString()));
+ KillApplicationRequest killRequest = Records.newRecord(KillApplicationRequest.class);
+ killRequest.setApplicationId(appId);
+
+ rmClient.forceKillApplication(killRequest);
+
+ try { YarnUtils.destroyHdfsNamespace(appId.toString(), conf); } catch(Exception ignore) {}
+
+ propertiesFile.delete();
+ }
+
+ void connect() {
+ YarnConfiguration yarnConf = new YarnConfiguration(conf);
+ InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS));
+ log.info("Connecting to ResourceManager at: " + rmAddress);
+ this.rmClient = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, conf));
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProviderProperties.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProviderProperties.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProviderProperties.java
new file mode 100644
index 0000000..85c8ab5
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerProviderProperties.java
@@ -0,0 +1,64 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import org.apache.helix.autoscale.provider.ProviderProperties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base configuration for {@link YarnContainerProviderProcess}
+ *
+ */
+public class YarnContainerProviderProperties extends ProviderProperties {
+ /**
+ *
+ */
+ private static final long serialVersionUID = -8853614843205587170L;
+
+ public final static String YARNDATA = "yarndata";
+ public final static String RESOURCEMANAGER = "resourcemananger";
+ public final static String SCHEDULER = "scheduler";
+ public final static String USER = "user";
+ public final static String HDFS = "hdfs";
+
+ public boolean isValid() {
+ return super.isValid() &&
+ containsKey(YARNDATA) &&
+ containsKey(RESOURCEMANAGER) &&
+ containsKey(SCHEDULER) &&
+ containsKey(USER) &&
+ containsKey(HDFS);
+ }
+
+ public String getYarnData() {
+ return getProperty(YARNDATA);
+ }
+
+ public String getResourceManager() {
+ return getProperty(RESOURCEMANAGER);
+ }
+
+ public String getScheduler() {
+ return getProperty(SCHEDULER);
+ }
+
+ public String getUser() {
+ return getProperty(USER);
+ }
+
+ public String getHdfs() {
+ return getProperty(HDFS);
+ }
+
+ @Override
+ public String getProperty(String key) {
+ Preconditions.checkState(containsKey(key));
+ return super.getProperty(key);
+ }
+
+ @Override
+ public Object get(Object key) {
+ Preconditions.checkState(containsKey(key));
+ return super.get(key);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerService.java
new file mode 100644
index 0000000..e730c25
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnContainerService.java
@@ -0,0 +1,156 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.io.File;
+import java.util.Properties;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.container.ContainerProcess;
+import org.apache.helix.autoscale.container.ContainerProcessProperties;
+import org.apache.helix.autoscale.container.ContainerUtils;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Configurable and runnable service for YARN-based containers. Continuously
+ * checks container meta data and process state and triggers state changes and
+ * container setup and shutdown.
+ *
+ */
+class YarnContainerService implements Service {
+ static final Logger log = Logger.getLogger(YarnContainerService.class);
+
+ static final long CONTAINERSERVICE_INTERVAL = 1000;
+
+ YarnContainerProcessProperties properties;
+
+ YarnDataProvider metaService;
+ ScheduledExecutorService executor;
+
+ ContainerProcess process;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ Preconditions.checkNotNull(properties);
+ YarnContainerProcessProperties containerProperties = new YarnContainerProcessProperties();
+ containerProperties.putAll(properties);
+ Preconditions.checkArgument(containerProperties.isValid());
+
+ this.properties = containerProperties;
+ }
+
+ public void setYarnDataProvider(YarnDataProvider metaService) {
+ this.metaService = metaService;
+ }
+
+ @Override
+ public void start() {
+ Preconditions.checkNotNull(metaService);
+ Preconditions.checkNotNull(properties);
+ Preconditions.checkState(properties.isValid());
+
+ log.debug("starting yarn container service");
+
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new ContainerStatusService(), 0, CONTAINERSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+ }
+
+ @Override
+ public void stop() {
+ log.debug("stopping yarn container service");
+
+ if (executor != null) {
+ executor.shutdown();
+ while (!executor.isTerminated()) {
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ }
+ executor = null;
+ }
+
+ destroyLocalContainerNamespace();
+ }
+
+ class ContainerStatusService implements Runnable {
+ @Override
+ public void run() {
+ log.info("updating container status");
+
+ try {
+ if (!metaService.exists(properties.getName())) {
+ log.warn(String.format("YarnData for '%s' does not exist. Terminating yarn service.", properties.getName()));
+ process.stop();
+ stop();
+ }
+
+ YarnContainerData meta = metaService.read(properties.getName());
+
+ if (meta.state == ContainerState.CONNECTING) {
+ log.trace("container connecting");
+ try {
+ ContainerProcessProperties containerProperties = meta.getProperties();
+
+ containerProperties.setProperty(ContainerProcessProperties.CLUSTER, properties.getCluster());
+ containerProperties.setProperty(ContainerProcessProperties.ADDRESS, properties.getAddress());
+ containerProperties.setProperty(ContainerProcessProperties.NAME, properties.getName());
+
+ process = ContainerUtils.createProcess(containerProperties);
+ process.start();
+ } catch (Exception e) {
+ log.error("Failed to start participant, going to failed", e);
+ }
+
+ if (process.isActive()) {
+ log.trace("process active, activating container");
+ metaService.update(meta.setState(ContainerState.ACTIVE));
+
+ } else if (process.isFailed()) {
+ log.trace("process failed, failing container");
+ metaService.update(meta.setState(ContainerState.FAILED));
+
+ } else {
+ log.trace("process state unknown, failing container");
+ metaService.update(meta.setState(ContainerState.FAILED));
+ }
+ }
+
+ if (meta.state == ContainerState.ACTIVE) {
+ log.trace("container active");
+ if (process.isFailed()) {
+ log.trace("process failed, failing container");
+ metaService.update(meta.setState(ContainerState.FAILED));
+
+ } else if (!process.isActive()) {
+ log.trace("process not active, halting container");
+ process.stop();
+ metaService.update(meta.setState(ContainerState.HALTED));
+ }
+ }
+
+ if (meta.state == ContainerState.TEARDOWN) {
+ log.trace("container teardown");
+ process.stop();
+ metaService.update(meta.setState(ContainerState.HALTED));
+ }
+
+ } catch (Exception e) {
+ log.error(String.format("Error while updating container '%s' status", properties.getName()), e);
+ }
+ }
+ }
+
+ public static void destroyLocalContainerNamespace() {
+ log.info("cleaning up container directory");
+ FileUtils.deleteQuietly(new File(YarnUtils.YARN_CONTAINER_DESTINATION));
+ FileUtils.deleteQuietly(new File(YarnUtils.YARN_CONTAINER_PROPERTIES));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnDataProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnDataProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnDataProvider.java
new file mode 100644
index 0000000..188045d
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnDataProvider.java
@@ -0,0 +1,73 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.util.Collection;
+
+/**
+ * Abstraction for a (remote) repository of yarn container meta data. Meta data
+ * is read and updated by {@link YarnContainerProvider}
+ * {@link YarnMasterProcess}, {@link YarnContainerProcess}.<br/>
+ * <b>NOTE:</b> Each operation is assumed to be atomic.
+ *
+ */
+interface YarnDataProvider {
+
+ /**
+ * Checks for existence of meta data about container insatnce
+ *
+ * @param id
+ * unique container id
+ * @return true, if meta data exists
+ */
+ public boolean exists(String id);
+
+ /**
+ * Create meta data entry. Check for non-existence of meta data for given
+ * container id and create node.
+ *
+ * @param data
+ * container meta data with unique id
+ * @throws Exception
+ * if meta data entry already exist
+ */
+ public void create(YarnContainerData data) throws Exception;
+
+ /**
+ * Read meta data for given container id.
+ *
+ * @param id
+ * unique container id
+ * @return yarn container data
+ * @throws Exception
+ * if meta data entry for given id does not exist
+ */
+ public YarnContainerData read(String id) throws Exception;
+
+ /**
+ * Read all meta data stored for this domain space of yarn providers and
+ * containers.
+ *
+ * @return collection of meta data entries, empty if none
+ * @throws Exception
+ */
+ public Collection<YarnContainerData> readAll() throws Exception;
+
+ /**
+ * Write meta data entry.
+ *
+ * @param data
+ * yarn container meta data
+ * @throws Exception
+ * if meta data entry for given id does not exist
+ */
+ public void update(YarnContainerData data) throws Exception;
+
+ /**
+ * Delete meta data entry. Frees up unique id to be reused. May throw an
+ * exception on non-existence or be idempotent.
+ *
+ * @param id
+ * unique container id
+ * @throws Exception
+ */
+ public void delete(String id) throws Exception;
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterProcess.java
new file mode 100644
index 0000000..25b73f5
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterProcess.java
@@ -0,0 +1,144 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.net.InetSocketAddress;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.autoscale.provider.ProviderProcess;
+import org.apache.log4j.Logger;
+
+/**
+ * Host process for {@link YarnContainerProviderProcess}. Hasts application
+ * master in YARN and provider participant to Helix meta cluster. (Program entry
+ * point)
+ *
+ */
+class YarnMasterProcess {
+
+ static final Logger log = Logger.getLogger(YarnMasterProcess.class);
+
+ public static void main(String[] args) throws Exception {
+ log.trace("BEGIN YarnMaster.main()");
+
+ final ApplicationAttemptId appAttemptId = getApplicationAttemptId();
+ log.info(String.format("Got application attempt id '%s'", appAttemptId.toString()));
+
+ log.debug("Reading master properties");
+ YarnMasterProperties properties = YarnUtils.createMasterProperties(YarnUtils.getPropertiesFromPath(YarnUtils.YARN_MASTER_PROPERTIES));
+
+ if (!properties.isValid())
+ throw new IllegalArgumentException(String.format("master properties not valid: %s", properties.toString()));
+
+ log.debug("Connecting to resource manager");
+ Configuration conf = new YarnConfiguration();
+ conf.set(YarnConfiguration.RM_ADDRESS, properties.getResourceManager());
+ conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, properties.getScheduler());
+ conf.set(FileSystem.FS_DEFAULT_NAME_KEY, properties.getHdfs());
+
+ final AMRMProtocol resourceManager = getResourceManager(conf);
+
+ // register the AM with the RM
+ log.debug("Registering application master");
+ RegisterApplicationMasterRequest appMasterRequest = Records.newRecord(RegisterApplicationMasterRequest.class);
+ appMasterRequest.setApplicationAttemptId(appAttemptId);
+ appMasterRequest.setHost("");
+ appMasterRequest.setRpcPort(0);
+ appMasterRequest.setTrackingUrl("");
+
+ resourceManager.registerApplicationMaster(appMasterRequest);
+
+ log.debug("Starting yarndata service");
+ final ZookeeperYarnDataProvider yarnDataService = new ZookeeperYarnDataProvider(properties.getYarnData());
+ yarnDataService.start();
+
+ log.debug("Starting yarn master service");
+ final YarnMasterService service = new YarnMasterService();
+ service.configure(properties);
+ service.setAttemptId(appAttemptId);
+ service.setYarnDataProvider(yarnDataService);
+ service.setProtocol(resourceManager);
+ service.setYarnConfiguration(conf);
+ service.start();
+
+ log.debug("Starting provider");
+ final YarnContainerProvider provider = new YarnContainerProvider();
+ provider.configure(properties);
+ provider.start();
+
+ log.debug("Starting provider process");
+ final ProviderProcess process = new ProviderProcess();
+ process.configure(properties);
+ process.setConteinerProvider(provider);
+ process.start();
+
+ log.debug("Installing shutdown hooks");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ log.debug("Stopping provider process");
+ process.stop();
+
+ log.debug("Stopping provider");
+ try { provider.stop(); } catch (Exception ignore) {}
+
+ log.debug("Stopping yarn master service");
+ service.stop();
+
+ log.debug("Stopping yarndata service");
+ yarnDataService.stop();
+
+ // finish application
+ log.debug("Sending finish request");
+ FinishApplicationMasterRequest finishReq = Records.newRecord(FinishApplicationMasterRequest.class);
+
+ finishReq.setAppAttemptId(getApplicationAttemptId());
+ finishReq.setFinishApplicationStatus(FinalApplicationStatus.SUCCEEDED);
+
+ try { resourceManager.finishApplicationMaster(finishReq); } catch(Exception ignore) {}
+ }
+ }));
+
+ log.trace("END YarnMaster.main()");
+ }
+
+ static AMRMProtocol getResourceManager(Configuration conf) {
+ // Connect to the Scheduler of the ResourceManager.
+ YarnConfiguration yarnConf = new YarnConfiguration(conf);
+ YarnRPC rpc = YarnRPC.create(yarnConf);
+ InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(YarnConfiguration.RM_SCHEDULER_ADDRESS,
+ YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS));
+ log.info("Connecting to ResourceManager at " + rmAddress);
+ AMRMProtocol resourceManager = (AMRMProtocol) rpc.getProxy(AMRMProtocol.class, rmAddress, conf);
+ return resourceManager;
+ }
+
+ static ApplicationAttemptId getApplicationAttemptId() {
+ ContainerId containerId = ConverterUtils.toContainerId(getEnv(ApplicationConstants.AM_CONTAINER_ID_ENV));
+ ApplicationAttemptId appAttemptID = containerId.getApplicationAttemptId();
+ return appAttemptID;
+ }
+
+ static String getEnv(String key) {
+ Map<String, String> envs = System.getenv();
+ String clusterName = envs.get(key);
+ if (clusterName == null) {
+ // container id should always be set in the env by the framework
+ throw new IllegalArgumentException(String.format("%s not set in the environment", key));
+ }
+ return clusterName;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterProperties.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterProperties.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterProperties.java
new file mode 100644
index 0000000..3f49852
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterProperties.java
@@ -0,0 +1,13 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+/**
+ * Base configuration for {@link YarnMasterProcess}.
+ *
+ */
+public class YarnMasterProperties extends YarnContainerProviderProperties {
+ /**
+ *
+ */
+ private static final long serialVersionUID = -2209509980239674160L;
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterService.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterService.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterService.java
new file mode 100644
index 0000000..03d4f72
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnMasterService.java
@@ -0,0 +1,414 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.io.File;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ContainerManager;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.records.AMResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+/**
+ * Implements YARN application master. Continuously monitors container health in
+ * YARN and yarn meta data updates. Spawns and destroys containers.
+ *
+ */
+class YarnMasterService implements Service {
+
+ static final Logger log = Logger.getLogger(YarnMasterService.class);
+
+ static final String REQUIRED_TYPE = "container";
+
+ static final long ZOOKEEPER_TIMEOUT = 5000;
+ static final long MASTERSERVICE_INTERVAL = 1000;
+
+ static final String CONTAINERS = "CONTAINERS";
+
+ static final String YARN_CONTAINER_COMMAND = "/bin/sh %s 1>%s/stdout 2>%s/stderr";
+
+ YarnMasterProperties properties;
+ AMRMProtocol protocol;
+ ApplicationAttemptId attemptId;
+ Configuration yarnConfig;
+ YarnDataProvider yarnDataService;
+
+ final Map<ContainerId, Container> unassignedContainers = new HashMap<ContainerId, Container>();
+ final Map<ContainerId, Container> activeContainers = new HashMap<ContainerId, Container>();
+ final Map<ContainerId, ContainerStatus> completedContainers = new HashMap<ContainerId, ContainerStatus>();
+ final Map<ContainerId, String> yarn2meta = new HashMap<ContainerId, String>();
+
+ ScheduledExecutorService executor;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ YarnMasterProperties yarnProperties = YarnUtils.createMasterProperties(properties);
+ Preconditions.checkArgument(yarnProperties.isValid());
+ this.properties = yarnProperties;
+ }
+
+ public void setProtocol(AMRMProtocol protocol) {
+ this.protocol = protocol;
+ }
+
+ public void setAttemptId(ApplicationAttemptId attemptId) {
+ this.attemptId = attemptId;
+ }
+
+ public void setYarnConfiguration(Configuration yarnConfig) {
+ this.yarnConfig = yarnConfig;
+ }
+
+ public void setYarnDataProvider(YarnDataProvider yarnDataService) {
+ this.yarnDataService = yarnDataService;
+ }
+
+ @Override
+ public void start() {
+ Preconditions.checkNotNull(properties);
+ Preconditions.checkNotNull(protocol);
+ Preconditions.checkNotNull(attemptId);
+ Preconditions.checkNotNull(yarnConfig);
+ Preconditions.checkNotNull(yarnDataService);
+
+ log.debug("starting yarn master service");
+
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new YarnService(), 0, MASTERSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+ }
+
+ @Override
+ public void stop() {
+ log.debug("stopping yarn master service");
+
+ if (executor != null) {
+ executor.shutdown();
+ while (!executor.isTerminated()) {
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ }
+ executor = null;
+ }
+
+ destroyLocalMasterNamespace();
+ }
+
+ Collection<YarnContainerData> readOwnedYarnData() throws Exception {
+ log.debug("reading container data");
+
+ Collection<YarnContainerData> containers = new ArrayList<YarnContainerData>();
+ for (YarnContainerData meta : yarnDataService.readAll()) {
+ if (meta.owner.equals(properties.getName())) {
+ containers.add(meta);
+ log.debug(String.format("found container node '%s' (state=%s, yarnId=%s, owner=%s)", meta.id, meta.state, meta.yarnId, meta.owner));
+ }
+ }
+ return containers;
+ }
+
+ class YarnService implements Runnable {
+ int responseId = 0;
+
+ @Override
+ public void run() {
+ try {
+ log.debug("running yarn service update cycle");
+
+ Collection<YarnContainerData> yarndata = readOwnedYarnData();
+
+ // active meta containers
+ int numMetaActive = countActiveMeta(yarndata);
+
+ // newly acquired meta containers
+ int numMetaAcquire = countAcquireMeta(yarndata);
+
+ // destroyed meta containers
+ List<ContainerId> destroyedReleasedIds = createDestroyedReleaseList(yarndata);
+ int numMetaCompleted = destroyedReleasedIds.size();
+
+ int numMeta = numMetaAcquire + numMetaActive + numMetaCompleted;
+
+ // yarn containers
+ int numYarnUnassigned = unassignedContainers.size();
+ int numYarnActive = activeContainers.size();
+ int numYarnCompleted = completedContainers.size();
+ int numYarn = numYarnUnassigned + numYarnActive + numYarnCompleted;
+
+ int numYarnRequired = numMetaAcquire - numYarnUnassigned;
+
+ // additionally required containers
+ int numRequestAdditional = Math.max(0, numYarnRequired);
+
+ // overstock containers
+ List<ContainerId> unneededReleasedIds = createOverstockReleaseList(numYarnRequired);
+
+ int numReleased = destroyedReleasedIds.size() + unneededReleasedIds.size();
+
+ log.debug(String.format("meta containers (total=%d, acquire=%d, active=%d, completed=%d)", numMeta, numMetaAcquire, numMetaActive, numMetaCompleted));
+ log.debug(String.format("yarn containers (total=%d, unassigned=%d, active=%d, completed=%d)", numYarn, numYarnUnassigned, numYarnActive, numYarnCompleted));
+ log.debug(String.format("requesting %d new containers, releasing %d", numRequestAdditional, numReleased));
+
+ Priority priority = Records.newRecord(Priority.class);
+ priority.setPriority(0);
+
+ Resource resource = Records.newRecord(Resource.class);
+ resource.setMemory(256); // TODO make dynamic
+
+ ResourceRequest resourceRequest = Records.newRecord(ResourceRequest.class);
+ resourceRequest.setHostName("*");
+ resourceRequest.setNumContainers(numRequestAdditional);
+ resourceRequest.setPriority(priority);
+ resourceRequest.setCapability(resource);
+
+ AllocateRequest request = Records.newRecord(AllocateRequest.class);
+ request.setResponseId(responseId);
+ request.setApplicationAttemptId(attemptId);
+ request.addAsk(resourceRequest);
+ request.addAllReleases(destroyedReleasedIds);
+ request.addAllReleases(unneededReleasedIds);
+
+ responseId++;
+
+ AllocateResponse allocateResponse = null;
+ try {
+ allocateResponse = protocol.allocate(request);
+ } catch (YarnRemoteException e) {
+ // ignore
+ log.error("Error allocating containers", e);
+ return;
+ }
+
+ AMResponse response = allocateResponse.getAMResponse();
+
+ // remove unassigned container about to be freed
+ for (ContainerId id : unneededReleasedIds) {
+ log.info(String.format("Unassigned container '%s' about to be freed, removing", id));
+ unassignedContainers.remove(id);
+ }
+
+ // newly added containers
+ for (Container container : response.getAllocatedContainers()) {
+ unassignedContainers.put(container.getId(), container);
+ }
+
+ log.info(String.format("%d new containers available, %d required", unassignedContainers.size(), numMetaAcquire));
+
+ Iterator<Container> itYarn = unassignedContainers.values().iterator();
+ Iterator<YarnContainerData> itMeta = yarndata.iterator();
+ while (itYarn.hasNext() && itMeta.hasNext()) {
+ YarnContainerData meta = itMeta.next();
+
+ if (meta.yarnId >= 0)
+ continue;
+
+ Container containerYarn = itYarn.next();
+
+ log.debug(String.format("assigning yarn container '%s' to container node '%s'", containerYarn.getId(), meta.id));
+
+ String command = String.format(YARN_CONTAINER_COMMAND, YarnUtils.YARN_CONTAINER_PATH, ApplicationConstants.LOG_DIR_EXPANSION_VAR,
+ ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+
+ log.debug(String.format("Running container command \"%s\"", command));
+
+ // configuration
+ YarnContainerProcessProperties containerProp = meta.getProperties();
+ containerProp.setProperty(YarnContainerProcessProperties.ADDRESS, properties.getAddress());
+ containerProp.setProperty(YarnContainerProcessProperties.CLUSTER, properties.getCluster());
+ containerProp.setProperty(YarnContainerProcessProperties.YARNDATA, properties.getYarnData());
+ containerProp.setProperty(YarnContainerProcessProperties.NAME, meta.id);
+
+ File propertiesFile = YarnUtils.writePropertiesToTemp(containerProp);
+
+ // HDFS
+ final String namespace = attemptId.getApplicationId().toString() + "/" + meta.id;
+ final Path containerArchive = YarnUtils.copyToHdfs(YarnUtils.YARN_CONTAINER_STAGING, YarnUtils.YARN_CONTAINER_STAGING, namespace, yarnConfig);
+ final Path containerProperties = YarnUtils.copyToHdfs(propertiesFile.getCanonicalPath(), YarnUtils.YARN_CONTAINER_PROPERTIES, namespace, yarnConfig);
+
+ // local resources
+ Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
+ localResources.put(YarnUtils.YARN_CONTAINER_DESTINATION,
+ YarnUtils.createHdfsResource(containerArchive, LocalResourceType.ARCHIVE, yarnConfig));
+ localResources.put(YarnUtils.YARN_CONTAINER_PROPERTIES,
+ YarnUtils.createHdfsResource(containerProperties, LocalResourceType.FILE, yarnConfig));
+
+ ContainerLaunchContext context = Records.newRecord(ContainerLaunchContext.class);
+ context.setContainerId(containerYarn.getId());
+ context.setResource(containerYarn.getResource());
+ context.setEnvironment(Maps.<String, String> newHashMap());
+ context.setCommands(Collections.singletonList(command));
+ context.setLocalResources(localResources);
+ context.setUser(properties.getUser());
+
+ log.debug(String.format("container '%s' executing command '%s'", meta.id, command));
+
+ StartContainerRequest startReq = Records.newRecord(StartContainerRequest.class);
+ startReq.setContainerLaunchContext(context);
+
+ try {
+ getContainerManager(containerYarn).startContainer(startReq);
+
+ } catch (YarnRemoteException e) {
+ log.error(String.format("Error starting container '%s'", meta.id), e);
+ return;
+ }
+
+ log.debug(String.format("container '%s' started, updating container node", meta.id));
+
+ meta.setProperties(containerProp);
+ meta.setState(ContainerState.CONNECTING);
+ meta.setYarnId(containerYarn.getId().getId());
+ yarnDataService.update(meta);
+
+ yarn2meta.put(containerYarn.getId(), meta.id);
+
+ log.debug(String.format("removing '%s' from unassigned yarn containers and adding to active list", containerYarn.getId()));
+
+ itYarn.remove();
+ activeContainers.put(containerYarn.getId(), containerYarn);
+
+ // cleanup
+ propertiesFile.deleteOnExit();
+
+ }
+
+ for (ContainerStatus status : response.getCompletedContainersStatuses()) {
+ ContainerId id = status.getContainerId();
+
+ log.info(String.format("Container '%s' completed", id));
+
+ if (unassignedContainers.containsKey(id)) {
+ log.info(String.format("Unassigned container '%s' terminated, removing", id));
+ unassignedContainers.remove(id);
+ }
+
+ if (activeContainers.containsKey(id)) {
+ log.info(String.format("Active container '%s' terminated, removing", id));
+ activeContainers.remove(id);
+
+ String metaId = yarn2meta.get(id);
+ YarnContainerData meta = yarnDataService.read(metaId);
+
+ log.debug(String.format("container '%s' finalized, updating container node", meta.id));
+
+ yarnDataService.update(meta.setState(ContainerState.FINALIZE));
+ }
+
+ completedContainers.put(id, status);
+ }
+
+ log.debug("yarn service update cycle complete");
+
+ } catch (Exception e) {
+ log.error("Error while executing yarn update cycle", e);
+ }
+ }
+
+ private List<ContainerId> createOverstockReleaseList(int numYarnRequired) {
+ List<ContainerId> unneededReleasedIds = new ArrayList<ContainerId>();
+ Iterator<Container> itUnassigned = unassignedContainers.values().iterator();
+ if (numYarnRequired < 0) {
+ for (int i = 0; i < -numYarnRequired && itUnassigned.hasNext(); i++) {
+ Container container = itUnassigned.next();
+ unneededReleasedIds.add(container.getId());
+ log.debug(String.format("Container '%s' no longer required, removing", container.getId()));
+ itUnassigned.remove();
+ }
+ }
+ return unneededReleasedIds;
+ }
+
+ private List<ContainerId> createDestroyedReleaseList(Collection<YarnContainerData> yarndata) {
+ List<ContainerId> releasedIds = new ArrayList<ContainerId>();
+ for (YarnContainerData meta : yarndata) {
+ if (meta.state == ContainerState.HALTED) {
+ ContainerId containerId = Records.newRecord(ContainerId.class);
+ containerId.setApplicationAttemptId(attemptId);
+ containerId.setId(meta.yarnId);
+ releasedIds.add(containerId);
+ log.debug(String.format("releasing container '%s'", containerId));
+ }
+ }
+ return releasedIds;
+ }
+
+ private int countAcquireMeta(Collection<YarnContainerData> yarndata) {
+ int numMetaAcquire = 0;
+ for (YarnContainerData meta : yarndata) {
+ if (meta.state == ContainerState.ACQUIRE) {
+ numMetaAcquire++;
+ }
+ }
+ return numMetaAcquire;
+ }
+
+ private int countActiveMeta(Collection<YarnContainerData> yarndata) {
+ int numMetaActive = 0;
+ for (YarnContainerData meta : yarndata) {
+ if (meta.state != ContainerState.ACQUIRE && meta.state != ContainerState.HALTED && meta.state != ContainerState.FINALIZE) {
+ numMetaActive++;
+ }
+ }
+ return numMetaActive;
+ }
+ }
+
+ private ContainerManager getContainerManager(Container container) {
+ YarnConfiguration yarnConf = new YarnConfiguration(yarnConfig);
+ YarnRPC rpc = YarnRPC.create(yarnConf);
+ NodeId nodeId = container.getNodeId();
+ String containerIpPort = String.format("%s:%d", nodeId.getHost(), nodeId.getPort());
+ log.info("Connecting to ContainerManager at: " + containerIpPort);
+ InetSocketAddress addr = NetUtils.createSocketAddr(containerIpPort);
+ ContainerManager cm = (ContainerManager) rpc.getProxy(ContainerManager.class, addr, yarnConfig);
+ return cm;
+ }
+
+ public static void destroyLocalMasterNamespace() {
+ log.info("cleaning up master directory");
+ FileUtils.deleteQuietly(new File(YarnUtils.YARN_MASTER_DESTINATION));
+ FileUtils.deleteQuietly(new File(YarnUtils.YARN_MASTER_PROPERTIES));
+ FileUtils.deleteQuietly(new File(YarnUtils.YARN_CONTAINER_STAGING));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnStatusProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnStatusProvider.java
new file mode 100644
index 0000000..6ec4710
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnStatusProvider.java
@@ -0,0 +1,67 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.util.Properties;
+
+import org.apache.helix.autoscale.StatusProviderService;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+/**
+ * StatusProvider for YARN-based containers spawned via
+ * {@link YarnContainerProvider}. Reads {@link YarnDataProvider} meta data.
+ * Runnable and configurable service.
+ *
+ */
+public class YarnStatusProvider implements StatusProviderService {
+
+ static final Logger log = Logger.getLogger(YarnStatusProvider.class);
+
+ String yarndata;
+
+ ZookeeperYarnDataProvider yarnDataService;
+
+ public YarnStatusProvider() {
+ // left blank
+ }
+
+ public YarnStatusProvider(String yarndata) {
+ this.yarndata = yarndata;
+ this.yarnDataService = new ZookeeperYarnDataProvider(yarndata);
+ }
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ this.yarndata = properties.getProperty("yarndata");
+ this.yarnDataService = new ZookeeperYarnDataProvider(yarndata);
+ }
+
+ @Override
+ public void start() throws Exception {
+ yarnDataService = new ZookeeperYarnDataProvider(yarndata);
+ yarnDataService.start();
+ }
+
+ @Override
+ public void stop() throws Exception {
+ if (yarnDataService != null) {
+ yarnDataService.stop();
+ yarnDataService = null;
+ }
+ }
+
+ @Override
+ public boolean exists(String id) {
+ return yarnDataService.exists(id);
+ }
+
+ @Override
+ public boolean isHealthy(String id) {
+ try {
+ return yarnDataService.read(id).state == ContainerState.ACTIVE;
+ } catch (Exception e) {
+ log.warn(String.format("Could not get activity data of %s", id));
+ return false;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnUtils.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnUtils.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnUtils.java
new file mode 100644
index 0000000..1051696
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/YarnUtils.java
@@ -0,0 +1,174 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.UnsupportedFileSystemException;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.URL;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.io.Files;
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.TypeAdapter;
+import com.google.gson.stream.JsonReader;
+import com.google.gson.stream.JsonToken;
+import com.google.gson.stream.JsonWriter;
+
+/**
+ * Utility for writing property files, transferring data via HDFS and
+ * serializing {@link YarnContainerData} for zookeeper.
+ *
+ */
+class YarnUtils {
+
+ static final Logger log = Logger.getLogger(YarnUtils.class);
+
+ static final String YARN_MASTER_ARCHIVE_PATH = "target/metamanager-assembly.tar.gz";
+ static final String YARN_MASTER_PATH = "master/metamanager/bin/yarn-master-process.sh";
+ static final String YARN_MASTER_STAGING = "master.tar.gz";
+ static final String YARN_MASTER_DESTINATION = "master";
+ static final String YARN_MASTER_PROPERTIES = "master.properties";
+ static final String YARN_CONTAINER_ARCHIVE_PATH = "target/metamanager-assembly.tar.gz";
+ static final String YARN_CONTAINER_STAGING = "container.tar.gz";
+ static final String YARN_CONTAINER_PATH = "container/metamanager/bin/yarn-container-process.sh";
+ static final String YARN_CONTAINER_DESTINATION = "container";
+ static final String YARN_CONTAINER_PROPERTIES = "container.properties";
+
+ static Gson gson;
+ static {
+ GsonBuilder builder = new GsonBuilder();
+ builder.registerTypeAdapter(ContainerState.class, new ContainerStateAdapter());
+ builder.setPrettyPrinting();
+ gson = builder.create();
+ }
+
+ public static String toJson(YarnContainerData meta) {
+ return gson.toJson(meta);
+ }
+
+ public static YarnContainerData fromJson(String json) {
+ return gson.fromJson(json, YarnContainerData.class);
+ }
+
+ public static Properties getPropertiesFromPath(String path) throws IOException {
+ Properties properties = new Properties();
+ properties.load(new InputStreamReader(new FileInputStream(path)));
+ return properties;
+ }
+
+ public static File writePropertiesToTemp(Properties properties) throws IOException {
+ File tmpFile = File.createTempFile("provider", ".properties");
+ Writer writer = Files.newWriter(tmpFile, Charset.defaultCharset());
+ properties.store(writer, null);
+ writer.flush();
+ writer.close();
+ return tmpFile;
+ }
+
+ public static Path copyToHdfs(String source, String dest, String namespace, Configuration conf) throws IOException {
+ Path sourcePath = makeQualified(source);
+ Path destPath = makeQualified(conf.get(FileSystem.FS_DEFAULT_NAME_KEY) + "/" + namespace + "/" + dest);
+ log.debug(String.format("Copying '%s' to '%s'", sourcePath, destPath));
+
+ FileSystem fs = FileSystem.get(conf);
+ fs.copyFromLocalFile(false, true, sourcePath, destPath);
+ fs.close();
+ return destPath;
+ }
+
+ public static void destroyHdfsNamespace(String namespace, Configuration conf) throws IOException {
+ Path path = makeQualified(conf.get(FileSystem.FS_DEFAULT_NAME_KEY) + "/" + namespace);
+ log.debug(String.format("Deleting '%s'", path));
+
+ FileSystem fs = FileSystem.get(conf);
+ fs.delete(path, true);
+ fs.close();
+ }
+
+ public static LocalResource createHdfsResource(Path path, LocalResourceType type, Configuration conf) throws IOException {
+ FileSystem fs = FileSystem.get(conf);
+
+ URL url = ConverterUtils.getYarnUrlFromPath(path);
+
+ FileStatus status = fs.getFileStatus(path);
+
+ LocalResource resource = Records.newRecord(LocalResource.class);
+ resource.setResource(url);
+ resource.setSize(status.getLen());
+ resource.setTimestamp(status.getModificationTime());
+ resource.setType(type);
+ resource.setVisibility(LocalResourceVisibility.APPLICATION);
+
+ fs.close();
+
+ return resource;
+ }
+
+ static Path makeQualified(String path) throws UnsupportedFileSystemException {
+ return FileContext.getFileContext().makeQualified(new Path(path));
+ }
+
+ static class ContainerStateAdapter extends TypeAdapter<ContainerState> {
+ @Override
+ public ContainerState read(JsonReader reader) throws IOException {
+ if (reader.peek() == JsonToken.NULL) {
+ reader.nextNull();
+ return null;
+ }
+ return ContainerState.valueOf(reader.nextString());
+ }
+
+ @Override
+ public void write(JsonWriter writer, ContainerState value) throws IOException {
+ if (value == null) {
+ writer.nullValue();
+ return;
+ }
+ writer.value(value.name());
+ }
+ }
+
+ static YarnContainerProcessProperties createContainerProcessProperties(Properties properties) {
+ Preconditions.checkNotNull(properties);
+ YarnContainerProcessProperties yarnProp = new YarnContainerProcessProperties();
+ yarnProp.putAll(properties);
+ return yarnProp;
+ }
+
+ static YarnContainerProviderProperties createContainerProviderProperties(Properties properties) {
+ Preconditions.checkNotNull(properties);
+ YarnContainerProviderProperties yarnProp = new YarnContainerProviderProperties();
+ yarnProp.putAll(properties);
+ return yarnProp;
+ }
+
+ static YarnMasterProperties createMasterProperties(Properties properties) {
+ Preconditions.checkNotNull(properties);
+ YarnMasterProperties yarnProp = new YarnMasterProperties();
+ yarnProp.putAll(properties);
+ return yarnProp;
+ }
+
+ private YarnUtils() {
+ // left blank
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/ZookeeperYarnDataProvider.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/ZookeeperYarnDataProvider.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/ZookeeperYarnDataProvider.java
new file mode 100644
index 0000000..32f8c79
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/impl/yarn/ZookeeperYarnDataProvider.java
@@ -0,0 +1,100 @@
+package org.apache.helix.autoscale.impl.yarn;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Properties;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.autoscale.Service;
+import org.apache.log4j.Logger;
+
+/**
+ * Configurable and runnable service for {@link YarnDataProvider} based on
+ * zookeeper.
+ *
+ */
+public class ZookeeperYarnDataProvider implements YarnDataProvider, Service {
+
+ static final Logger log = Logger.getLogger(ZookeeperYarnDataProvider.class);
+
+ static final String CONTAINER_NAMESPACE = "containers";
+
+ static final String BASE_PATH = "/" + CONTAINER_NAMESPACE;
+
+ static final int META_TIMEOUT = 5000;
+ static final long POLL_INTERVAL = 100;
+
+ String yarndata;
+
+ ZkClient client;
+
+ public ZookeeperYarnDataProvider() {
+ // left blank
+ }
+
+ public ZookeeperYarnDataProvider(String yarndataAddress) {
+ this.yarndata = yarndataAddress;
+ }
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ this.yarndata = properties.getProperty("yarndata");
+ }
+
+ @Override
+ public void start() {
+ log.debug(String.format("starting yarndata service for '%s'", yarndata));
+
+ client = new ZkClient(yarndata, META_TIMEOUT, META_TIMEOUT);
+
+ client.createPersistent(BASE_PATH, true);
+ }
+
+ @Override
+ public void stop() {
+ log.debug(String.format("stopping yarndata service for '%s'", yarndata));
+ if (client != null) {
+ client.close();
+ client = null;
+ }
+ }
+
+ @Override
+ public boolean exists(String id) {
+ return client.exists(makePath(id));
+ }
+
+ @Override
+ public void create(YarnContainerData meta) throws Exception {
+ client.createEphemeral(makePath(meta.id), YarnUtils.toJson(meta));
+ }
+
+ @Override
+ public YarnContainerData read(String id) throws Exception {
+ return YarnUtils.fromJson(client.<String> readData(makePath(id)));
+ }
+
+ @Override
+ public Collection<YarnContainerData> readAll() throws Exception {
+ Collection<YarnContainerData> yarndata = new ArrayList<YarnContainerData>();
+ for (String id : client.getChildren(BASE_PATH)) {
+ yarndata.add(YarnUtils.fromJson(client.<String> readData(makePath(id))));
+ }
+ return yarndata;
+ }
+
+ @Override
+ public void update(YarnContainerData meta) throws Exception {
+ client.writeData(makePath(meta.id), YarnUtils.toJson(meta));
+ }
+
+ @Override
+ public void delete(String id) throws Exception {
+ client.delete(makePath(id));
+ }
+
+ String makePath(String containerId) {
+ return BASE_PATH + "/" + containerId;
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderProcess.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderProcess.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderProcess.java
new file mode 100644
index 0000000..2fe3166
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderProcess.java
@@ -0,0 +1,82 @@
+package org.apache.helix.autoscale.provider;
+
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.autoscale.ClusterAdmin;
+import org.apache.helix.autoscale.ContainerProvider;
+import org.apache.helix.autoscale.HelixClusterAdmin;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Helix participant for ContainerProvider. Configurable via ProviderProperties
+ * and runnable service.
+ *
+ */
+public class ProviderProcess implements Service {
+ static final Logger log = Logger.getLogger(ProviderProcess.class);
+
+ ClusterAdmin admin;
+
+ ProviderProperties properties;
+ ContainerProvider provider;
+ HelixAdmin helixAdmin;
+ HelixManager participantManager;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ Preconditions.checkNotNull(properties);
+ ProviderProperties providerProperties = new ProviderProperties();
+ providerProperties.putAll(properties);
+ Preconditions.checkArgument(providerProperties.isValid());
+
+ this.properties = providerProperties;
+
+ }
+
+ public void setConteinerProvider(ContainerProvider provider) {
+ this.provider = provider;
+ }
+
+ @Override
+ public void start() throws Exception {
+ Preconditions.checkNotNull(provider);
+
+ log.info(String.format("Registering provider '%s' at '%s/%s'", properties.getName(), properties.getMetaAddress(), properties.getMetaCluster()));
+ HelixAdmin metaHelixAdmin = new ZKHelixAdmin(properties.getMetaAddress());
+ metaHelixAdmin.addInstance(properties.getMetaCluster(), new InstanceConfig(properties.getName()));
+ metaHelixAdmin.close();
+
+ log.info(String.format("Starting provider '%s'", properties.getName()));
+ helixAdmin = new ZKHelixAdmin(properties.getAddress());
+ admin = new HelixClusterAdmin(properties.getCluster(), helixAdmin);
+
+ participantManager = HelixManagerFactory.getZKHelixManager(properties.getMetaCluster(), properties.getName(), InstanceType.PARTICIPANT,
+ properties.getMetaAddress());
+ participantManager.getStateMachineEngine().registerStateModelFactory("OnlineOffline", new ProviderStateModelFactory(provider, admin));
+ participantManager.connect();
+
+ log.info(String.format("Successfully started provider '%s'", properties.getName()));
+ }
+
+ @Override
+ public void stop() {
+ log.info(String.format("Stopping provider '%s'", properties.getName()));
+ if (participantManager != null) {
+ participantManager.disconnect();
+ participantManager = null;
+ }
+ if (helixAdmin != null) {
+ helixAdmin.close();
+ helixAdmin = null;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderProperties.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderProperties.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderProperties.java
new file mode 100644
index 0000000..eef9fad
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderProperties.java
@@ -0,0 +1,97 @@
+package org.apache.helix.autoscale.provider;
+
+import java.util.Collections;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.helix.autoscale.bootstrapper.BootUtils;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base configuration for {@link ProviderProcess}.
+ *
+ */
+public class ProviderProperties extends Properties {
+ /**
+ *
+ */
+ private static final long serialVersionUID = -2209509977839674160L;
+
+ public final static String ADDRESS = "address";
+ public final static String CLUSTER = "cluster";
+ public final static String METAADDRESS = "metaaddress";
+ public final static String METACLUSTER = "metacluster";
+ public final static String NAME = "name";
+
+ public final static String CONTAINER_NAMESPACE = "containers";
+
+ public boolean isValid() {
+ return(containsKey(ADDRESS) &&
+ containsKey(CLUSTER) &&
+ containsKey(METAADDRESS) &&
+ containsKey(METACLUSTER) &&
+ containsKey(NAME));
+ }
+
+ public String getAddress() {
+ return getProperty(ADDRESS);
+ }
+
+ public String getCluster() {
+ return getProperty(CLUSTER);
+ }
+
+ public String getMetaAddress() {
+ return getProperty(METAADDRESS);
+ }
+
+ public String getMetaCluster() {
+ return getProperty(METACLUSTER);
+ }
+
+ public String getName() {
+ return getProperty(NAME);
+ }
+
+ public Set<String> getContainers() {
+ if(!BootUtils.hasNamespace(this, CONTAINER_NAMESPACE))
+ return Collections.emptySet();
+ return BootUtils.getNamespaces(BootUtils.getNamespace(this, CONTAINER_NAMESPACE));
+ }
+
+ public boolean hasContainer(String id) {
+ if(!BootUtils.hasNamespace(this, CONTAINER_NAMESPACE)) return false;
+ if(!BootUtils.hasNamespace(BootUtils.getNamespace(this, CONTAINER_NAMESPACE), id)) return false;
+ return true;
+ }
+
+ public Properties getContainer(String id) {
+ Preconditions.checkArgument(BootUtils.hasNamespace(this, CONTAINER_NAMESPACE), "no container namespace");
+ Preconditions.checkArgument(BootUtils.hasNamespace(BootUtils.getNamespace(this, CONTAINER_NAMESPACE), id), "container %s not configured", id);
+ return BootUtils.getNamespace(BootUtils.getNamespace(this, CONTAINER_NAMESPACE), id);
+ }
+
+ public void addContainer(String id, Properties properties) {
+ Preconditions.checkArgument(!getContainers().contains(id), "Already contains container type %s", id);
+
+ // add container config
+ for(Map.Entry<Object, Object> entry : properties.entrySet()) {
+ this.put(CONTAINER_NAMESPACE + "." + id + "." + entry.getKey(), entry.getValue());
+ }
+ }
+
+ @Override
+ public Object get(Object key) {
+ Preconditions.checkState(containsKey(key));
+ return super.get(key);
+ }
+
+ @Override
+ public String getProperty(String key) {
+ Preconditions.checkState(containsKey(key));
+ return super.getProperty(key);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderRebalancer.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderRebalancer.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderRebalancer.java
new file mode 100644
index 0000000..2b6e428
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderRebalancer.java
@@ -0,0 +1,352 @@
+package org.apache.helix.autoscale.provider;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.autoscale.StatusProvider;
+import org.apache.helix.autoscale.TargetProvider;
+import org.apache.helix.controller.rebalancer.Rebalancer;
+import org.apache.helix.controller.stages.ClusterDataCache;
+import org.apache.helix.controller.stages.CurrentStateOutput;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.LiveInstance;
+import org.apache.helix.model.Partition;
+import org.apache.helix.model.Resource;
+import org.apache.helix.model.ResourceAssignment;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Sets;
+
+/**
+ * Rebalancer for meta cluster. Polls {@link TargetProvider} and
+ * {@link StatusProvider} and reads and sets IdealState of meta cluster participants (
+ * {@link ProviderProcess}). The number of active container is set to the target
+ * count. Failed containers are shut down and restarted on any available
+ * provider. Also, container counts are balanced across multiple providers.<br/>
+ * <b>NOTE:</b> status and target provider are injected via
+ * {@link ProviderRebalancerSingleton}<br/>
+ * <br/>
+ * <b>IdealState mapping:</b><br/>
+ * resource = container type<br/>
+ * partition = logical container instance<br/>
+ * instance = container provider<br/>
+ * status = physical container instance presence<br/>
+ */
+public class ProviderRebalancer implements Rebalancer {
+
+ static final Logger log = Logger.getLogger(ProviderRebalancer.class);
+
+ static final long UPDATE_INTERVAL_MIN = 1500;
+
+ static final Object lock = new Object();
+ static long nextUpdate = 0;
+
+ TargetProvider targetProvider;
+ StatusProvider statusProvider;
+ HelixManager manager;
+
+ @Override
+ public void init(HelixManager manager) {
+ this.targetProvider = ProviderRebalancerSingleton.getTargetProvider();
+ this.statusProvider = ProviderRebalancerSingleton.getStatusProvider();
+ this.manager = manager;
+ }
+
+ @Override
+ public ResourceAssignment computeResourceMapping(Resource resource, IdealState idealState, CurrentStateOutput currentStateOutput,
+ ClusterDataCache clusterData) {
+
+ final String resourceName = resource.getResourceName();
+ final String containerType = resourceName;
+
+ final SortedSet<String> allContainers = Sets.newTreeSet(new IndexedNameComparator());
+ allContainers.addAll(idealState.getPartitionSet());
+
+ final SortedSet<String> allProviders = Sets.newTreeSet(new IndexedNameComparator());
+ for (LiveInstance instance : clusterData.getLiveInstances().values()) {
+ allProviders.add(instance.getId());
+ }
+
+ final ResourceState currentState = new ResourceState(resourceName, currentStateOutput);
+
+ // target container count
+ log.debug(String.format("Retrieving target container count for type '%s'", containerType));
+ int targetCount = -1;
+ try {
+ targetCount = targetProvider.getTargetContainerCount(containerType);
+ } catch (Exception e) {
+ log.error(String.format("Could not retrieve target count for '%s'", containerType), e);
+ return new ResourceAssignment(resourceName);
+ }
+
+ // provider sanity check
+ if (allProviders.isEmpty()) {
+ log.warn(String.format("Could not find any providers"));
+ return new ResourceAssignment(resourceName);
+ }
+
+ // all containers
+ SortedSet<String> assignedContainers = getAssignedContainers(currentState, allContainers);
+ SortedSet<String> failedContainers = getFailedContainers(currentState, allContainers);
+
+ log.info(String.format("Rebalancing '%s' (target=%d, active=%d, failures=%d)", resourceName, targetCount, assignedContainers.size(),
+ failedContainers.size()));
+
+ if (log.isDebugEnabled()) {
+ log.debug(String.format("%s: assigned containers %s", resourceName, assignedContainers));
+ log.debug(String.format("%s: failed containers %s", resourceName, failedContainers));
+ }
+
+ // assignment
+ int maxCountPerProvider = (int) Math.ceil(targetCount / (float) allProviders.size());
+
+ ResourceAssignment assignment = new ResourceAssignment(resourceName);
+ CountMap counts = new CountMap(allProviders);
+ int assignmentCount = 0;
+
+ // currently assigned
+ for (String containerName : assignedContainers) {
+ String providerName = getProvider(currentState, containerName);
+ Partition partition = new Partition(containerName);
+
+ if (failedContainers.contains(containerName)) {
+ log.warn(String.format("Container '%s:%s' failed, going offline", providerName, containerName));
+ assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "OFFLINE"));
+
+ } else if (counts.get(providerName) >= maxCountPerProvider) {
+ log.warn(String.format("Container '%s:%s' misassigned, going offline", providerName, containerName));
+ assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "OFFLINE"));
+
+ } else {
+ assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "ONLINE"));
+ }
+
+ counts.increment(providerName);
+ assignmentCount++;
+ }
+
+ // currently unassigned
+ SortedSet<String> unassignedContainers = Sets.newTreeSet(new IndexedNameComparator());
+ unassignedContainers.addAll(allContainers);
+ unassignedContainers.removeAll(assignedContainers);
+
+ for (String containerName : unassignedContainers) {
+ if (assignmentCount >= targetCount)
+ break;
+
+ String providerName = counts.getMinKey();
+ Partition partition = new Partition(containerName);
+
+ if (failedContainers.contains(containerName)) {
+ log.warn(String.format("Container '%s:%s' failed and unassigned, going offline", providerName, containerName));
+ assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "OFFLINE"));
+
+ } else {
+ assignment.addReplicaMap(partition, Collections.singletonMap(providerName, "ONLINE"));
+ }
+
+ counts.increment(providerName);
+ assignmentCount++;
+ }
+
+ if (log.isDebugEnabled()) {
+ log.debug(String.format("assignment counts: %s", counts));
+ log.debug(String.format("assignment: %s", assignment));
+ }
+
+ return assignment;
+ }
+
+ boolean hasProvider(ResourceState state, String containerName) {
+ Map<String, String> currentStateMap = state.getCurrentStateMap(containerName);
+ Map<String, String> pendingStateMap = state.getPendingStateMap(containerName);
+ return hasInstance(currentStateMap, "ONLINE") || hasInstance(pendingStateMap, "ONLINE");
+ }
+
+ String getProvider(ResourceState state, String containerName) {
+ Map<String, String> currentStateMap = state.getCurrentStateMap(containerName);
+ if (hasInstance(currentStateMap, "ONLINE"))
+ return getInstance(currentStateMap, "ONLINE");
+
+ Map<String, String> pendingStateMap = state.getPendingStateMap(containerName);
+ return getInstance(pendingStateMap, "ONLINE");
+ }
+
+ SortedSet<String> getFailedContainers(ResourceState state, Collection<String> containers) {
+ SortedSet<String> failedContainers = Sets.newTreeSet(new IndexedNameComparator());
+ for (String containerName : containers) {
+ Map<String, String> currentStateMap = state.getCurrentStateMap(containerName);
+ Map<String, String> pendingStateMap = state.getPendingStateMap(containerName);
+
+ if (hasInstance(currentStateMap, "ERROR")) {
+ failedContainers.add(containerName);
+ continue;
+ }
+
+ if (!hasInstance(currentStateMap, "ONLINE") || hasInstance(pendingStateMap, "OFFLINE"))
+ continue;
+
+ // container listed online and not in transition, but not active
+ if (!statusProvider.isHealthy(containerName)) {
+ log.warn(String.format("Container '%s' designated ONLINE, but is not active", containerName));
+ failedContainers.add(containerName);
+ }
+ }
+ return failedContainers;
+ }
+
+ SortedSet<String> getAssignedContainers(ResourceState state, Collection<String> containers) {
+ SortedSet<String> assignedContainers = Sets.newTreeSet(new IndexedNameComparator());
+ for (String containerName : containers) {
+
+ if (!hasProvider(state, containerName))
+ continue;
+
+ assignedContainers.add(containerName);
+ }
+ return assignedContainers;
+ }
+
+ boolean hasInstance(Map<String, String> stateMap, String state) {
+ if (!stateMap.isEmpty()) {
+ for (Map.Entry<String, String> entry : stateMap.entrySet()) {
+ if (entry.getValue().equals(state)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ String getInstance(Map<String, String> stateMap, String state) {
+ if (!stateMap.isEmpty()) {
+ for (Map.Entry<String, String> entry : stateMap.entrySet()) {
+ if (entry.getValue().equals(state)) {
+ return entry.getKey();
+ }
+ }
+ }
+ throw new IllegalArgumentException(String.format("Could not find instance with state '%s'", state));
+ }
+
+ class IndexedNameComparator implements Comparator<String> {
+ Pattern pattern = Pattern.compile("^(.*)([0-9]+)$");
+
+ @Override
+ public int compare(String o1, String o2) {
+ Matcher m1 = pattern.matcher(o1);
+ Matcher m2 = pattern.matcher(o2);
+
+ boolean find1 = m1.find();
+ boolean find2 = m2.find();
+
+ if (!find1 && !find2)
+ return o1.compareTo(o2);
+
+ if (!find1 && find2)
+ return -1;
+
+ if (find1 && !find2)
+ return 1;
+
+ String name1 = m1.group(1);
+ String name2 = m2.group(1);
+
+ int name_comp = name1.compareTo(name2);
+ if (name_comp != 0)
+ return name_comp;
+
+ int index1 = Integer.valueOf(m1.group(2));
+ int index2 = Integer.valueOf(m2.group(2));
+
+ return (int) Math.signum(index1 - index2);
+ }
+ }
+
+ class CountMap extends HashMap<String, Integer> {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 3954138748385337978L;
+
+ public CountMap(Collection<String> keys) {
+ super();
+ for (String key : keys) {
+ put(key, 0);
+ }
+ }
+
+ @Override
+ public Integer get(Object key) {
+ Preconditions.checkArgument(containsKey(key), "Key %s not found", key);
+ return super.get(key);
+ }
+
+ public int increment(String key) {
+ int newValue = get(key) + 1;
+ Preconditions.checkArgument(containsKey(key), "Key %s not found", key);
+ put(key, newValue);
+ return newValue;
+ }
+
+ public String getMinKey() {
+ Preconditions.checkState(size() > 0, "Must contain at least one item");
+
+ String minKey = null;
+ int minValue = Integer.MAX_VALUE;
+
+ for (String key : keySet()) {
+ int value = get(key);
+ if (value < minValue) {
+ minValue = value;
+ minKey = key;
+ }
+ }
+
+ return minKey;
+ }
+
+ public String getMaxKey() {
+ Preconditions.checkState(size() > 0, "Must contain at least one item");
+
+ String maxKey = null;
+ int maxValue = Integer.MIN_VALUE;
+
+ for (String key : keySet()) {
+ int value = get(key);
+ if (value > maxValue) {
+ maxValue = value;
+ maxKey = key;
+ }
+ }
+
+ return maxKey;
+ }
+ }
+
+ class ResourceState {
+ final String resourceName;
+ final CurrentStateOutput state;
+
+ public ResourceState(String resourceName, CurrentStateOutput state) {
+ this.resourceName = resourceName;
+ this.state = state;
+ }
+
+ Map<String, String> getCurrentStateMap(String partitionName) {
+ return state.getCurrentStateMap(resourceName, new Partition(partitionName));
+ }
+
+ Map<String, String> getPendingStateMap(String partitionName) {
+ return state.getPendingStateMap(resourceName, new Partition(partitionName));
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderRebalancerSingleton.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderRebalancerSingleton.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderRebalancerSingleton.java
new file mode 100644
index 0000000..16b8829
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderRebalancerSingleton.java
@@ -0,0 +1,38 @@
+package org.apache.helix.autoscale.provider;
+
+import org.apache.helix.autoscale.StatusProvider;
+import org.apache.helix.autoscale.TargetProvider;
+import org.apache.log4j.Logger;
+
+/**
+ * Utility for dependency injection into ProviderRebalancer.
+ *
+ */
+public class ProviderRebalancerSingleton {
+
+ static final Logger log = Logger.getLogger(ProviderRebalancerSingleton.class);
+
+ static TargetProvider targetProvider;
+ static StatusProvider statusProvider;
+
+ private ProviderRebalancerSingleton() {
+ // left blank
+ }
+
+ public static TargetProvider getTargetProvider() {
+ return targetProvider;
+ }
+
+ public static void setTargetProvider(TargetProvider targetProvider) {
+ ProviderRebalancerSingleton.targetProvider = targetProvider;
+ }
+
+ public static StatusProvider getStatusProvider() {
+ return statusProvider;
+ }
+
+ public static void setStatusProvider(StatusProvider statusProvider) {
+ ProviderRebalancerSingleton.statusProvider = statusProvider;
+ }
+
+}
[02/15] Adding Helix-task-framework and Yarn integration modules
Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Shell.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/Boot2By2Shell.properties b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Shell.properties
new file mode 100644
index 0000000..830a586
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Shell.properties
@@ -0,0 +1,87 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=localhost:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=localhost:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=localhost:2199
+metaresource.0.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=localhost:2199
+metaresource.1.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=localhost:2199
+metaprovider.0.class=org.apache.helix.metamanager.impl.shell.ShellContainerProviderProcess
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=localhost:2199
+metaprovider.1.class=org.apache.helix.metamanager.impl.shell.ShellContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.metamanager.impl.shell.ShellStatusProvider
+
+metacontroller.target.class=org.apache.helix.metamanager.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Yarn.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/Boot2By2Yarn.properties b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Yarn.properties
new file mode 100644
index 0000000..6d220eb
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/Boot2By2Yarn.properties
@@ -0,0 +1,98 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=rm:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=rm:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=rm:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=rm:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=rm:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=rm:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=rm:2199
+metaresource.0.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=rm:2199
+metaresource.1.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=rm:2199
+metaprovider.0.class=org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProcess
+metaprovider.0.yarndata=rm:2199
+metaprovider.0.resourcemananger=rm:8032
+metaprovider.0.scheduler=rm:8030
+metaprovider.0.user=yarn
+metaprovider.0.hdfs=hdfs://rm:9000/
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=rm:2199
+metaprovider.1.class=org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProcess
+metaprovider.1.yarndata=rm:2199
+metaprovider.1.resourcemananger=rm:8032
+metaprovider.1.scheduler=rm:8030
+metaprovider.1.user=yarn
+metaprovider.1.hdfs=hdfs://rm:9000/
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=rm:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.metamanager.impl.yarn.YarnStatusProvider
+metacontroller.status.yarndata=rm:2199
+
+metacontroller.target.class=org.apache.helix.metamanager.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/BootLocal.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/BootLocal.properties b/recipes/meta-cluster-manager/src/main/resources/BootLocal.properties
new file mode 100644
index 0000000..a86c9f0
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/BootLocal.properties
@@ -0,0 +1,68 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.name=resource
+resource.cluster=cluster
+resource.address=localhost:2199
+resource.container=container
+resource.model=MasterSlave
+resource.partitions=10
+resource.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.name=container
+metaresource.metacluster=meta
+metaresource.metaaddress=localhost:2199
+metaresource.class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.name=provider
+metaprovider.metacluster=meta
+metaprovider.metaaddress=localhost:2199
+metaprovider.class=org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.metamanager.impl.local.LocalStatusProvider
+
+metacontroller.target.class=org.apache.helix.metamanager.impl.StaticTargetProvider
+metacontroller.target.container=7
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/boot/cluster.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/boot/cluster.properties b/recipes/meta-cluster-manager/src/main/resources/boot/cluster.properties
new file mode 100644
index 0000000..17d9406
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/boot/cluster.properties
@@ -0,0 +1,2 @@
+cluster.cluster=managed
+cluster.address=localhost:2199
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/boot/controller.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/boot/controller.properties b/recipes/meta-cluster-manager/src/main/resources/boot/controller.properties
new file mode 100644
index 0000000..1d96260
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/boot/controller.properties
@@ -0,0 +1,4 @@
+controller.name=controller
+controller.cluster=managed
+controller.address=localhost:2199
+controller.autorefresh=-1
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/boot/metacluster.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/boot/metacluster.properties b/recipes/meta-cluster-manager/src/main/resources/boot/metacluster.properties
new file mode 100644
index 0000000..f1e6062
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/boot/metacluster.properties
@@ -0,0 +1,4 @@
+metacluster.cluster=managed
+metacluster.address=localhost:2199
+metacluster.metacluster=meta
+metacluster.metaaddress=localhost:2199
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/boot/metacontroller.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/boot/metacontroller.properties b/recipes/meta-cluster-manager/src/main/resources/boot/metacontroller.properties
new file mode 100644
index 0000000..133ac69
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/boot/metacontroller.properties
@@ -0,0 +1,4 @@
+controller.name=metacontroller
+controller.cluster=meta
+controller.address=localhost:2199
+controller.autorefresh=5000
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/boot/resdb.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/boot/resdb.properties b/recipes/meta-cluster-manager/src/main/resources/boot/resdb.properties
new file mode 100644
index 0000000..0830e50
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/boot/resdb.properties
@@ -0,0 +1,4 @@
+name=zookeeper
+datadir=/tmp/zk/data
+logdir=/tmp/zk/log
+port=2199
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/boot/resws.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/boot/resws.properties b/recipes/meta-cluster-manager/src/main/resources/boot/resws.properties
new file mode 100644
index 0000000..0830e50
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/boot/resws.properties
@@ -0,0 +1,4 @@
+name=zookeeper
+datadir=/tmp/zk/data
+logdir=/tmp/zk/log
+port=2199
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/boot/zookeeper.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/boot/zookeeper.properties b/recipes/meta-cluster-manager/src/main/resources/boot/zookeeper.properties
new file mode 100644
index 0000000..04587c8
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/boot/zookeeper.properties
@@ -0,0 +1,4 @@
+zookeeper.name=zookeeper
+zookeeper.datadir=/tmp/zk/data
+zookeeper.logdir=/tmp/zk/log
+zookeeper.port=2199
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/container.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/container.properties b/recipes/meta-cluster-manager/src/main/resources/container.properties
new file mode 100644
index 0000000..8817165
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/container.properties
@@ -0,0 +1 @@
+class=org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/log4j.properties b/recipes/meta-cluster-manager/src/main/resources/log4j.properties
new file mode 100644
index 0000000..af33e21
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/log4j.properties
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Set root logger level to DEBUG and its only appender to R.
+log4j.rootLogger=ERROR, C
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.C=org.apache.log4j.ConsoleAppender
+log4j.appender.C.layout=org.apache.log4j.PatternLayout
+log4j.appender.C.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.I0Itec=ERROR
+log4j.logger.org.apache=ERROR
+
+log4j.logger.org.apache.helix.metamanager=INFO
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/redisLocal.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/redisLocal.properties b/recipes/meta-cluster-manager/src/main/resources/redisLocal.properties
new file mode 100644
index 0000000..47c0800
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/redisLocal.properties
@@ -0,0 +1,50 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/meta/zk/data
+zookeeper.logdir=/tmp/meta/zk/log
+zookeeper.port=2199
+
+#
+# Meta Cluster
+#
+meta.cluster=meta
+meta.managed=managed
+meta.address=rm:2199
+
+meta.target.type=redis
+meta.target.address=rm:2199
+meta.target.root=redis
+meta.target.records=100000
+meta.target.clients=20
+meta.target.requests=100000
+meta.target.target.get=100000
+meta.target.target.set=100000
+
+meta.status.type=local
+meta.status.metadata=rm:2199
+
+meta.provider.type=local
+meta.provider.name=provider_0
+meta.provider.address=rm:2199
+meta.provider.cluster=managed
+
+meta.provider.containers=cache
+
+meta.provider.container.cache.class=org.apache.helix.metamanager.impl.container.RedisServerProcess
+meta.provider.container.cache.address=rm:2199
+meta.provider.container.cache.root=redis
+meta.provider.container.cache.baseport=17000
+
+#
+# Managed Cluster
+#
+managed.cluster=managed
+managed.address=rm:2199
+
+managed.resources=devcache
+
+managed.resource.devcache.container=cache
+managed.resource.devcache.model=OnlineOffline
+managed.resource.devcache.partitions=10
+managed.resource.devcache.replica=1
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/resources/redisYarn.properties
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/resources/redisYarn.properties b/recipes/meta-cluster-manager/src/main/resources/redisYarn.properties
new file mode 100644
index 0000000..1862781
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/resources/redisYarn.properties
@@ -0,0 +1,52 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/export/home/eng/apucher/zk/data
+zookeeper.logdir=/export/home/eng/apucher/zk/log
+zookeeper.port=2199
+
+#
+# Meta Cluster
+#
+meta.address=rm:2199
+meta.interval=5000
+
+meta.target.type=redis
+meta.target.address=rm:2199
+meta.target.root=redis
+meta.target.interval=20000
+meta.target.timeout=10000
+meta.target.get=250000
+meta.target.min=3
+meta.target.max=23
+
+meta.status.type=yarn
+meta.status.metadata=rm:2199
+
+meta.provider.type=yarn
+meta.provider.name=provider_0
+meta.provider.address=rm:2199
+meta.provider.cluster=managed
+meta.provider.metadata=rm:2199
+meta.provider.resourcemananger=rm:8032
+meta.provider.scheduler=rm:8030
+meta.provider.user=yarn
+meta.provider.hdfs=hdfs://rm:9000/
+meta.provider.containers=cache
+
+meta.provider.container.cache.class=org.apache.helix.metamanager.impl.container.RedisServerProcess
+meta.provider.container.cache.address=rm:2199
+meta.provider.container.cache.root=redis
+meta.provider.container.cache.baseport=17000
+
+#
+# Managed Cluster
+#
+managed.address=rm:2199
+
+managed.resources=devcache
+
+managed.resource.devcache.container=cache
+managed.resource.devcache.model=OnlineOffline
+managed.resource.devcache.partitions=10
+managed.resource.devcache.replica=1
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/conf/testng-integration.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/conf/testng-integration.xml b/recipes/meta-cluster-manager/src/test/conf/testng-integration.xml
new file mode 100644
index 0000000..ed7d1c9
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/conf/testng-integration.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="Suite" parallel="none">
+ <test name="Test" preserve-order="false">
+ <packages>
+ <package name="org.apache.helix.metamanager.integration.*"/>
+ </packages>
+ </test>
+</suite>
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/conf/testng-unit.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/conf/testng-unit.xml b/recipes/meta-cluster-manager/src/test/conf/testng-unit.xml
new file mode 100644
index 0000000..e178e4a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/conf/testng-unit.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="Suite" parallel="none">
+ <test name="Test" preserve-order="false">
+ <packages>
+ <package name="org.apache.helix.metamanager.unit.*"/>
+ </packages>
+ </test>
+</suite>
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/conf/testng.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/conf/testng.xml b/recipes/meta-cluster-manager/src/test/conf/testng.xml
new file mode 100644
index 0000000..8c3517f
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/conf/testng.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="Suite" parallel="none">
+ <test name="Test" preserve-order="false">
+ <packages>
+ <package name="org.apache.helix.*"/>
+ </packages>
+ </test>
+</suite>
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/config/testng-integration.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/config/testng-integration.xml b/recipes/meta-cluster-manager/src/test/config/testng-integration.xml
new file mode 100644
index 0000000..ed7d1c9
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/config/testng-integration.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="Suite" parallel="none">
+ <test name="Test" preserve-order="false">
+ <packages>
+ <package name="org.apache.helix.metamanager.integration.*"/>
+ </packages>
+ </test>
+</suite>
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/config/testng-unit.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/config/testng-unit.xml b/recipes/meta-cluster-manager/src/test/config/testng-unit.xml
new file mode 100644
index 0000000..e178e4a
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/config/testng-unit.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="Suite" parallel="none">
+ <test name="Test" preserve-order="false">
+ <packages>
+ <package name="org.apache.helix.metamanager.unit.*"/>
+ </packages>
+ </test>
+</suite>
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/config/testng.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/config/testng.xml b/recipes/meta-cluster-manager/src/test/config/testng.xml
new file mode 100644
index 0000000..aa8f190
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/config/testng.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="Suite" parallel="none">
+ <test name="Test" preserve-order="true">
+ <packages>
+ <package name="org.apache.helix.metamanager.*"/>
+ </packages>
+ </test>
+</suite>
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/BootstrapperIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/BootstrapperIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/BootstrapperIT.java
new file mode 100644
index 0000000..7809bab
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/BootstrapperIT.java
@@ -0,0 +1,134 @@
+package org.apache.helix.metamanager;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.bootstrapper.Boot;
+import org.apache.helix.metamanager.bootstrapper.ClusterService;
+import org.apache.helix.metamanager.bootstrapper.ControllerService;
+import org.apache.helix.metamanager.bootstrapper.MetaClusterService;
+import org.apache.helix.metamanager.bootstrapper.MetaControllerService;
+import org.apache.helix.metamanager.bootstrapper.MetaProviderService;
+import org.apache.helix.metamanager.bootstrapper.MetaResourceService;
+import org.apache.helix.metamanager.bootstrapper.ResourceService;
+import org.apache.helix.metamanager.bootstrapper.ZookeeperService;
+import org.apache.log4j.Logger;
+import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Bootstrapping tool test. Reads cluster configuration from *.properties files,
+ * spawns services and verifies number of active partitions and containers
+ *
+ * @see Boot
+ */
+@Test(groups = { "integration", "boot" })
+public class BootstrapperIT {
+
+ static final Logger log = Logger.getLogger(BootstrapperIT.class);
+
+ Boot boot;
+ HelixAdmin admin;
+
+ @AfterMethod(alwaysRun = true)
+ public void teardown() throws Exception {
+ log.debug("tearing down bootstrap test");
+ if (admin != null) {
+ admin.close();
+ admin = null;
+ }
+ if (boot != null) {
+ boot.stop();
+ boot = null;
+ }
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+ public void bootstrapLocalTest() throws Exception {
+ boot = new Boot();
+ boot.configure(getProperties("BootLocal.properties"));
+ boot.start();
+
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), ZookeeperService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), ClusterService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), ResourceService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), ControllerService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaClusterService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaResourceService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaProviderService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaControllerService.class));
+
+ final long limit = System.currentTimeMillis() + TestUtils.REBALANCE_TIMEOUT;
+
+ admin = new ZKHelixAdmin("localhost:2199");
+ waitUntil(admin, "meta", "container", 1, 7, (limit - System.currentTimeMillis()));
+ waitUntil(admin, "cluster", "resource", 7, 10, (limit - System.currentTimeMillis()));
+
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+ public void bootstrap2By2LocalTest() throws Exception {
+ boot = new Boot();
+ boot.configure(getProperties("Boot2By2Local.properties"));
+ boot.start();
+
+ verify2By2Setup();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+ public void bootstrap2By2ShellTest() throws Exception {
+ boot = new Boot();
+ boot.configure(getProperties("Boot2By2Shell.properties"));
+ boot.start();
+
+ verify2By2Setup();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+ public void bootstrap2By2YarnTest() throws Exception {
+ boot = new Boot();
+ boot.configure(getProperties("Boot2By2Yarn.properties"));
+ boot.start();
+
+ verify2By2Setup();
+ }
+
+ void verify2By2Setup() throws Exception {
+ final long limit = System.currentTimeMillis() + TestUtils.REBALANCE_TIMEOUT;
+ final String address = "localhost:2199";
+
+ log.debug(String.format("connecting to zookeeper at '%s'", address));
+
+ admin = new ZKHelixAdmin(address);
+ waitUntil(admin, "meta", "database", 2, 3, (limit - System.currentTimeMillis()));
+ waitUntil(admin, "meta", "webserver", 2, 5, (limit - System.currentTimeMillis()));
+ waitUntil(admin, "cluster", "dbprod", 3, 8, (limit - System.currentTimeMillis()));
+ waitUntil(admin, "cluster", "wsprod", 5, 15, (limit - System.currentTimeMillis()));
+ }
+
+ static void waitUntil(HelixAdmin admin, String cluster, String resource, int instanceCount, int partitionCount, long timeout) throws Exception {
+ final long limit = System.currentTimeMillis() + timeout;
+ TestUtils.waitUntilInstanceCount(admin, cluster, resource, instanceCount, (limit - System.currentTimeMillis()));
+ TestUtils.waitUntilPartitionCount(admin, cluster, resource, partitionCount, (limit - System.currentTimeMillis()));
+ }
+
+ static Properties getProperties(String resourcePath) throws IOException {
+ Properties properties = new Properties();
+ properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+ return properties;
+ }
+
+ static boolean containsInstanceOf(Collection<Service> services, Class<?> clazz) {
+ for (Service service : services) {
+ if (clazz.isAssignableFrom(service.getClass()))
+ return true;
+ }
+ return false;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/FailoverIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/FailoverIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/FailoverIT.java
new file mode 100644
index 0000000..90e8be0
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/FailoverIT.java
@@ -0,0 +1,195 @@
+package org.apache.helix.metamanager;
+
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.metamanager.impl.local.LocalContainerSingleton;
+import org.apache.helix.metamanager.impl.local.LocalStatusProvider;
+import org.apache.helix.metamanager.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.metamanager.impl.shell.ShellContainerSingleton;
+import org.apache.helix.metamanager.impl.shell.ShellStatusProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.metamanager.impl.yarn.YarnStatusProvider;
+import org.apache.helix.metamanager.impl.yarn.ZookeeperYarnDataProvider;
+import org.apache.helix.metamanager.provider.ProviderRebalancer;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Fault-recovery test for individual containers and whole providers. Missing
+ * containers should be replaced by the meta cluster Rebalancer using remaining
+ * active providers.
+ *
+ * @see ProviderRebalancer
+ */
+@Test(groups = { "integration", "failure" })
+public class FailoverIT {
+
+ static final Logger log = Logger.getLogger(FailoverIT.class);
+
+ static final int CONTAINER_COUNT = 7;
+
+ StaticTargetProvider targetProvider;
+ YarnStatusProvider yarnStatusProvider;
+
+ @BeforeClass(alwaysRun = true)
+ public void setupClass() {
+ log.info("installing shutdown hook");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ try {
+ teardownTest();
+ } catch (Exception ignore) {
+ }
+ ;
+ }
+ }));
+ }
+
+ @BeforeMethod(alwaysRun = true)
+ public void setupTest() throws Exception {
+ teardownTest();
+ targetProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ }
+
+ @AfterMethod(alwaysRun = true)
+ public void teardownTest() throws Exception {
+ TestUtils.stopTestCluster();
+
+ if (yarnStatusProvider != null) {
+ yarnStatusProvider.stop();
+ yarnStatusProvider = null;
+ }
+
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+ public void testLocalContainerFailover() throws Exception {
+ log.info("testing local container failover");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ TestUtils.startTestCluster(targetProvider, new LocalStatusProvider(), makeLocalProviders(3));
+ killLocalContainers();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+ public void testLocalProviderFailover() throws Exception {
+ log.info("testing local provider failover");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ TestUtils.startTestCluster(targetProvider, new LocalStatusProvider(), makeLocalProviders(3));
+ killProvider();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+ public void testShellContainerFailover() throws Exception {
+ log.info("testing shell container failover");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ TestUtils.startTestCluster(targetProvider, new ShellStatusProvider(), makeShellProviders(3));
+ killShellContainers();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+ public void testShellProviderFailover() throws Exception {
+ log.info("testing shell provider failover");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ TestUtils.startTestCluster(targetProvider, new ShellStatusProvider(), makeShellProviders(3));
+ killProvider();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+ public void testYarnContainerFailover() throws Exception {
+ log.info("testing yarn container failover");
+ TestUtils.configure("distributed.properties");
+ TestUtils.startZookeeper();
+ yarnStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+ yarnStatusProvider.start();
+ TestUtils.startTestCluster(targetProvider, yarnStatusProvider, makeYarnProviders(3));
+ killYarnContainers();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+ public void testYarnProviderFailover() throws Exception {
+ log.info("testing yarn provider failover");
+ TestUtils.configure("distributed.properties");
+ TestUtils.startZookeeper();
+ yarnStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+ yarnStatusProvider.start();
+ TestUtils.startTestCluster(targetProvider, yarnStatusProvider, makeYarnProviders(3));
+ killProvider();
+ }
+
+ void killLocalContainers() throws Exception {
+ LocalContainerSingleton.killProcess("container_2");
+ LocalContainerSingleton.killProcess("container_4");
+ LocalContainerSingleton.killProcess("container_6");
+ Thread.sleep(3000);
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+ }
+
+ void killShellContainers() throws Exception {
+ ShellContainerSingleton.killProcess("container_2");
+ ShellContainerSingleton.killProcess("container_4");
+ ShellContainerSingleton.killProcess("container_6");
+ Thread.sleep(3000);
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+ }
+
+ void killYarnContainers() throws Exception {
+ ZookeeperYarnDataProvider yarnDataService = new ZookeeperYarnDataProvider(TestUtils.zkAddress);
+ yarnDataService.start();
+ yarnDataService.delete("container_2");
+ yarnDataService.delete("container_4");
+ yarnDataService.delete("container_6");
+ yarnDataService.stop();
+ Thread.sleep(3000);
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+ }
+
+ static void killProvider() throws Exception {
+ Iterator<Service> itService = TestUtils.providerServices.iterator();
+ itService.next().stop();
+ itService.remove();
+
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+ }
+
+ LocalContainerProviderProcess[] makeLocalProviders(int count) throws Exception {
+ LocalContainerProviderProcess[] localProviders = new LocalContainerProviderProcess[count];
+ for (int i = 0; i < count; i++) {
+ localProviders[i] = TestUtils.makeLocalProvider("provider_" + i);
+ }
+ return localProviders;
+ }
+
+ ShellContainerProviderProcess[] makeShellProviders(int count) throws Exception {
+ ShellContainerProviderProcess[] shellProviders = new ShellContainerProviderProcess[count];
+ for (int i = 0; i < count; i++) {
+ shellProviders[i] = TestUtils.makeShellProvider("provider_" + i);
+ }
+ return shellProviders;
+ }
+
+ YarnContainerProviderProcess[] makeYarnProviders(int count) throws Exception {
+ YarnContainerProviderProcess[] yarnProviders = new YarnContainerProviderProcess[count];
+ for (int i = 0; i < count; i++) {
+ yarnProviders[i] = TestUtils.makeYarnProvider("provider_" + i);
+ }
+ return yarnProviders;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/LocalContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/LocalContainerProviderIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/LocalContainerProviderIT.java
new file mode 100644
index 0000000..e1faddd
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/LocalContainerProviderIT.java
@@ -0,0 +1,80 @@
+package org.apache.helix.metamanager;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.metamanager.impl.local.LocalStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Local container provider and local status provider test. Scale-up and -down
+ * only, no failures.
+ *
+ * @see LocalContainerProvider
+ * @see LocalStatusProvider
+ */
+@Test(groups = { "integration", "local" })
+public class LocalContainerProviderIT {
+
+ static final Logger log = Logger.getLogger(LocalContainerProviderIT.class);
+
+ static final int CONTAINER_COUNT = 4;
+
+ StaticTargetProvider clusterStatusProvider;
+ LocalContainerProviderProcess containerProvider;
+ LocalStatusProvider containerStatusProvider;
+
+ @BeforeMethod(alwaysRun = true)
+ public void setupTest() throws Exception {
+ teardownTest();
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ containerProvider = TestUtils.makeLocalProvider("provider_0");
+ clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ containerStatusProvider = new LocalStatusProvider();
+ TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+ }
+
+ @AfterMethod(alwaysRun = true)
+ public void teardownTest() throws Exception {
+ TestUtils.stopTestCluster();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testStatic() throws Exception {
+ log.info("testing static");
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleUp() throws Exception {
+ log.info("testing scale up");
+ setContainerCount(CONTAINER_COUNT + 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleDown() throws Exception {
+ log.info("testing scale down");
+ setContainerCount(CONTAINER_COUNT - 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleCycle() throws Exception {
+ log.info("testing scale cycle");
+ setContainerCount(CONTAINER_COUNT + 2);
+ setContainerCount(CONTAINER_COUNT);
+ setContainerCount(CONTAINER_COUNT - 2);
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ void setContainerCount(int newContainerCount) throws Exception {
+ log.debug(String.format("Setting container count to %d", newContainerCount));
+ clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+ TestUtils.rebalanceTestCluster();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/ShellContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/ShellContainerProviderIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/ShellContainerProviderIT.java
new file mode 100644
index 0000000..45b3023
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/ShellContainerProviderIT.java
@@ -0,0 +1,95 @@
+package org.apache.helix.metamanager;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.metamanager.impl.shell.ShellStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Shell container provider and shell status provider test. Scale-up and -down
+ * only, no failures.
+ *
+ * @see ShellContainerProvider
+ * @see ShellStatusProvider
+ */
+@Test(groups = { "integration", "shell" })
+public class ShellContainerProviderIT {
+
+ static final Logger log = Logger.getLogger(ShellContainerProviderIT.class);
+
+ static final long TEST_TIMEOUT = 20000;
+ static final long REBALANCE_TIMEOUT = 10000;
+
+ static final int CONTAINER_COUNT = 4;
+
+ StaticTargetProvider clusterStatusProvider;
+ ShellContainerProviderProcess containerProvider;
+ ShellStatusProvider containerStatusProvider;
+
+ @BeforeClass(alwaysRun = true)
+ public void setupClass() {
+ log.info("installing shutdown hook");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ try { teardownTest(); } catch(Exception ignore) {};
+ }
+ }));
+ }
+
+ @BeforeMethod(alwaysRun = true)
+ public void setupTest() throws Exception {
+ teardownTest();
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ containerProvider = TestUtils.makeShellProvider("provider_0");
+ clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ containerStatusProvider = new ShellStatusProvider();
+ TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+ }
+
+ @AfterMethod(alwaysRun = true)
+ public void teardownTest() throws Exception {
+ TestUtils.stopTestCluster();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testStatic() throws Exception {
+ log.info("testing static");
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleUp() throws Exception {
+ log.info("testing scale up");
+ setContainerCount(CONTAINER_COUNT + 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleDown() throws Exception {
+ log.info("testing scale down");
+ setContainerCount(CONTAINER_COUNT - 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleCycle() throws Exception {
+ log.info("testing scale cycle");
+ setContainerCount(CONTAINER_COUNT + 2);
+ setContainerCount(CONTAINER_COUNT);
+ setContainerCount(CONTAINER_COUNT - 2);
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ void setContainerCount(int newContainerCount) throws Exception {
+ log.debug(String.format("Setting container count to %d", newContainerCount));
+ clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+ TestUtils.rebalanceTestCluster();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestContainerProvider.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestContainerProvider.java
new file mode 100644
index 0000000..d42a7b3
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestContainerProvider.java
@@ -0,0 +1,17 @@
+package org.apache.helix.metamanager;
+
+import org.apache.helix.metamanager.provider.local.LocalContainerProvider;
+import org.apache.helix.metamanager.provider.local.LocalContainerSingleton;
+
+public class TestContainerProvider extends LocalContainerProvider {
+
+ public TestContainerProvider(String providerName) {
+ super(TestUtils.zkAddress, TestUtils.managedClusterName, providerName);
+ }
+
+ public void destroyAll() {
+ super.destroyAll();
+ LocalContainerSingleton.reset();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestStatusProvider.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestStatusProvider.java
new file mode 100644
index 0000000..7e7f401
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestStatusProvider.java
@@ -0,0 +1,20 @@
+package org.apache.helix.metamanager;
+
+public class TestStatusProvider implements ClusterStatusProvider {
+
+ int targetContainerCount;
+
+ public TestStatusProvider(int targetContainerCount) {
+ this.targetContainerCount = targetContainerCount;
+ }
+
+ @Override
+ public int getTargetContainerCount(String type) {
+ return targetContainerCount;
+ }
+
+ public void setTargetContainerCount(int targetContainerCount) {
+ this.targetContainerCount = targetContainerCount;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtils.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtils.java
new file mode 100644
index 0000000..04587f0
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtils.java
@@ -0,0 +1,438 @@
+package org.apache.helix.metamanager;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.TimeoutException;
+
+import org.I0Itec.zkclient.IDefaultNameSpace;
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkServer;
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.metamanager.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProperties;
+import org.apache.helix.metamanager.provider.ProviderProperties;
+import org.apache.helix.metamanager.provider.ProviderRebalancer;
+import org.apache.helix.metamanager.provider.ProviderRebalancerSingleton;
+import org.apache.helix.model.ExternalView;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+/**
+ * Utility for creating a test cluster without the bootstrapping tool. Methods
+ * for verifying the number of active instances and partitions in a cluster.
+ *
+ */
+public class TestUtils {
+
+ static final Logger log = Logger.getLogger(TestUtils.class);
+
+ public static int zkPort;
+ public static String zkAddress;
+ public static String resmanAddress;
+ public static String schedulerAddress;
+ public static String hdfsAddress;
+ public static String yarnUser;
+
+ public static final String metaClusterName = "meta-cluster";
+ public static final String managedClusterName = "managed-cluster";
+ public static final String metaResourceName = "container";
+ public static final String managedResourceName = "database";
+
+ public static final int numManagedPartitions = 10;
+ public static final int numManagedReplica = 2;
+
+ public static final long TEST_TIMEOUT = 120000;
+ public static final long REBALANCE_TIMEOUT = 60000;
+ public static final long POLL_INTERVAL = 1000;
+
+ public static final ProviderProperties providerProperties = new ProviderProperties();
+
+ public static ZkServer server = null;
+ public static HelixAdmin admin = null;
+ public static HelixManager metaControllerManager = null;
+ public static HelixManager managedControllerManager = null;
+
+ public static Collection<Service> providerServices = new ArrayList<Service>();
+ public static Collection<Service> auxServices = new ArrayList<Service>();
+
+ public static TargetProvider targetProvider = null;
+ public static StatusProvider statusProvider = null;
+
+ static {
+ try {
+ configure();
+ } catch(Exception e) {
+ log.error("Could not setup TestUtils", e);
+ throw new RuntimeException(e);
+ }
+ }
+
+ private TestUtils() {
+ // left blank
+ }
+
+ public static void configure() throws IOException {
+ configure("standalone.properties");
+ }
+
+ public static void configure(String resourcePath) throws IOException {
+ log.info(String.format("Configuring Test cluster from %s", resourcePath));
+ Properties properties = new Properties();
+ properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+ configure(properties);
+ }
+
+ public static void configure(Properties properties) {
+ log.info(String.format("Configuring from properties '%s'", properties));
+
+ zkPort = Integer.valueOf(properties.getProperty("zookeeper.port"));
+ zkAddress = properties.getProperty("zookeeper.address");
+ resmanAddress = properties.getProperty("yarn.resourcemanager");
+ schedulerAddress = properties.getProperty("yarn.scheduler");
+ hdfsAddress = properties.getProperty("yarn.hdfs");
+ yarnUser = properties.getProperty("yarn.user");
+
+ Preconditions.checkNotNull(zkPort);
+ Preconditions.checkNotNull(zkAddress);
+ Preconditions.checkNotNull(resmanAddress);
+ Preconditions.checkNotNull(schedulerAddress);
+ Preconditions.checkNotNull(hdfsAddress);
+ Preconditions.checkNotNull(yarnUser);
+
+ configureInternal();
+ }
+
+ static void configureInternal() {
+ providerProperties.clear();
+ providerProperties.setProperty(ProviderProperties.ADDRESS, zkAddress);
+ providerProperties.setProperty(ProviderProperties.CLUSTER, managedClusterName);
+ providerProperties.setProperty(ProviderProperties.METAADDRESS, zkAddress);
+ providerProperties.setProperty(ProviderProperties.METACLUSTER, metaClusterName);
+ providerProperties.setProperty(ProviderProperties.NAME, "<unknown>");
+
+ Properties containerProperties = new Properties();
+ containerProperties.setProperty("class", "org.apache.helix.metamanager.impl.container.DummyMasterSlaveProcess");
+
+ providerProperties.addContainer("container", containerProperties);
+
+ log.info(String.format("Using provider properties '%s'", providerProperties));
+ }
+
+ public static void startZookeeper() throws Exception {
+ log.info("Starting ZooKeeper");
+
+ if (server != null)
+ throw new IllegalStateException("Zookeeper already running");
+
+ server = createLocalZookeeper();
+ server.start();
+ }
+
+ public static void stopZookeeper() throws Exception {
+ log.info("Stopping ZooKeeper");
+
+ if (server != null) {
+ server.shutdown();
+ server = null;
+ }
+ }
+
+ public static void startTestCluster(TargetProviderService targetProvider, StatusProviderService statusProvider, Service... containerProviderProcesses)
+ throws Exception {
+ log.debug(String.format("Starting test cluster"));
+
+ if (server == null)
+ throw new IllegalStateException("Zookeeper not running yet");
+
+ if (!auxServices.isEmpty() || !providerServices.isEmpty() || admin != null || metaControllerManager != null || managedControllerManager != null)
+ throw new IllegalStateException("TestCluster already running");
+
+ log.debug("Create admin");
+ admin = new ZKHelixAdmin(zkAddress);
+
+ log.debug("Create clusters");
+ admin.addCluster(metaClusterName, true);
+ admin.addCluster(managedClusterName, true);
+
+ log.debug("Setup config tool");
+ ProviderRebalancerSingleton.setTargetProvider(targetProvider);
+ ProviderRebalancerSingleton.setStatusProvider(statusProvider);
+
+ log.debug("Starting target and status provider");
+ TestUtils.targetProvider = startAuxService(targetProvider);
+ TestUtils.statusProvider = startAuxService(statusProvider);
+
+ // Managed Cluster
+ log.debug("Setup managed cluster");
+ admin.addStateModelDef(managedClusterName, "MasterSlave", new StateModelDefinition(StateModelConfigGenerator.generateConfigForMasterSlave()));
+ admin.addResource(managedClusterName, managedResourceName, numManagedPartitions, "MasterSlave", RebalanceMode.FULL_AUTO.toString());
+ IdealState managedIdealState = admin.getResourceIdealState(managedClusterName, managedResourceName);
+ managedIdealState.setInstanceGroupTag(metaResourceName);
+ managedIdealState.setReplicas(String.valueOf(numManagedReplica));
+ admin.setResourceIdealState(managedClusterName, managedResourceName, managedIdealState);
+
+ // Meta Cluster
+ log.debug("Setup meta cluster");
+ admin.addStateModelDef(metaClusterName, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+ admin.addResource(metaClusterName, metaResourceName, targetProvider.getTargetContainerCount(metaResourceName), "OnlineOffline",
+ RebalanceMode.USER_DEFINED.toString());
+
+ IdealState idealState = admin.getResourceIdealState(metaClusterName, metaResourceName);
+ idealState.setRebalancerClassName(ProviderRebalancer.class.getName());
+ idealState.setReplicas("1");
+
+ // BEGIN workaround
+ // FIXME workaround for HELIX-226
+ Map<String, List<String>> listFields = Maps.newHashMap();
+ Map<String, Map<String, String>> mapFields = Maps.newHashMap();
+ for (int i = 0; i < 256; i++) {
+ String partitionName = metaResourceName + "_" + i;
+ listFields.put(partitionName, new ArrayList<String>());
+ mapFields.put(partitionName, new HashMap<String, String>());
+ }
+ idealState.getRecord().setListFields(listFields);
+ idealState.getRecord().setMapFields(mapFields);
+ // END workaround
+
+ admin.setResourceIdealState(metaClusterName, metaResourceName, idealState);
+
+ log.debug("Starting container providers");
+ for (Service service : containerProviderProcesses) {
+ startProviderService(service);
+ }
+
+ log.debug("Starting managed cluster controller");
+ managedControllerManager = HelixControllerMain.startHelixController(zkAddress, managedClusterName, "managedController", HelixControllerMain.STANDALONE);
+
+ log.debug("Starting meta cluster controller");
+ metaControllerManager = HelixControllerMain.startHelixController(zkAddress, metaClusterName, "metaController", HelixControllerMain.STANDALONE);
+
+ log.debug("Waiting for stable state");
+ waitUntilRebalancedCount(targetProvider.getTargetContainerCount(metaResourceName));
+ }
+
+ public static void stopTestCluster() throws Exception {
+ log.debug(String.format("Stopping test cluster"));
+ if (managedControllerManager != null) {
+ log.info("Disconnecting managed cluster controller");
+ managedControllerManager.disconnect();
+ }
+ if (metaControllerManager != null) {
+ log.info("Disconnecting meta cluster controller");
+ metaControllerManager.disconnect();
+ }
+ log.info("Stopping provider services");
+ if (providerServices != null) {
+ for (Service service : providerServices) {
+ service.stop();
+ }
+ providerServices.clear();
+ }
+ log.debug("Stopping auxillary services");
+ if (auxServices != null) {
+ for (Service service : auxServices) {
+ service.stop();
+ }
+ auxServices.clear();
+ }
+
+ admin = null;
+ metaControllerManager = null;
+ managedControllerManager = null;
+ }
+
+ public static <T extends Service> T startAuxService(T service) throws Exception {
+ auxServices.add(service);
+ service.start();
+ return service;
+ }
+
+ public static <T extends Service> T startProviderService(T service) throws Exception {
+ providerServices.add(service);
+ service.start();
+ return service;
+ }
+
+ public static void rebalanceTestCluster() throws Exception {
+ log.debug(String.format("Triggering rebalance"));
+ IdealState poke = admin.getResourceIdealState(metaClusterName, metaResourceName);
+ admin.setResourceIdealState(metaClusterName, metaResourceName, poke);
+
+ int current = targetProvider.getTargetContainerCount(TestUtils.metaResourceName);
+ waitUntilRebalancedCount(current);
+ }
+
+ public static void waitUntilRebalancedCount(int containerCount) throws Exception {
+ log.debug(String.format("Waiting for rebalance with %d containers at '%s'", containerCount, zkAddress));
+
+ HelixAdmin admin = new ZKHelixAdmin(zkAddress);
+
+ try {
+ long limit = System.currentTimeMillis() + REBALANCE_TIMEOUT;
+ waitUntilPartitionCount(admin, metaClusterName, metaResourceName, containerCount, (limit - System.currentTimeMillis()));
+ waitUntilInstanceCount(admin, metaClusterName, metaResourceName, providerServices.size(), (limit - System.currentTimeMillis()));
+ waitUntilPartitionCount(admin, managedClusterName, managedResourceName, numManagedPartitions, (limit - System.currentTimeMillis()));
+
+ // FIXME workaround for Helix FULL_AUTO rebalancer not providing guarantees for cluster expansion
+ //waitUntilInstanceCount(admin, managedClusterName, managedResourceName, containerCount, (limit - System.currentTimeMillis()));
+ } catch (Exception e) {
+ throw e;
+ } finally {
+ admin.close();
+ }
+ }
+
+ public static void waitUntilInstanceCount(HelixAdmin admin, String cluster, String resource, int targetCount, long timeout) throws Exception {
+ log.debug(String.format("Waiting for instance count (cluster='%s', resource='%s', instanceCount=%d, timeout=%d)", cluster, resource, targetCount,
+ timeout));
+
+ long limit = System.currentTimeMillis() + timeout;
+ while (limit > System.currentTimeMillis()) {
+ int assignedCount = getAssingedInstances(admin, cluster, resource).size();
+ log.debug(String.format("checking instance count for '%s:%s': target=%d, current=%d", cluster, resource, targetCount, assignedCount));
+
+ if (targetCount == assignedCount) {
+ return;
+ }
+ Thread.sleep(POLL_INTERVAL);
+ }
+ throw new TimeoutException();
+ }
+
+ public static void waitUntilPartitionCount(HelixAdmin admin, String cluster, String resource, int targetCount, long timeout) throws Exception {
+ log.debug(String.format("Waiting for partition count (cluster='%s', resource='%s', partitionCount=%d, timeout=%d)", cluster, resource, targetCount,
+ timeout));
+
+ long limit = System.currentTimeMillis() + timeout;
+ while (limit > System.currentTimeMillis()) {
+ int assignedCount = getAssingedPartitions(admin, cluster, resource).size();
+ log.debug(String.format("checking partition count for '%s:%s': target=%d, current=%d", cluster, resource, targetCount, assignedCount));
+
+ if (targetCount == assignedCount) {
+ return;
+ }
+ Thread.sleep(POLL_INTERVAL);
+ }
+ throw new TimeoutException();
+ }
+
+ public static Set<String> getAssingedInstances(HelixAdmin admin, String clusterName, String resourceName) {
+ Set<String> assignedInstances = new HashSet<String>();
+
+ ExternalView externalView = admin.getResourceExternalView(clusterName, resourceName);
+
+ if (externalView == null)
+ return assignedInstances;
+
+ for (String partitionName : externalView.getPartitionSet()) {
+ Map<String, String> stateMap = externalView.getStateMap(partitionName);
+ if (stateMap == null)
+ continue;
+
+ for (String instanceName : stateMap.keySet()) {
+ String state = stateMap.get(instanceName);
+ if ("MASTER".equals(state) || "SLAVE".equals(state) || "ONLINE".equals(state)) {
+ assignedInstances.add(instanceName);
+ }
+ }
+ }
+
+ return assignedInstances;
+ }
+
+ public static Set<String> getAssingedPartitions(HelixAdmin admin, String clusterName, String resourceName) {
+ Set<String> assignedPartitions = new HashSet<String>();
+
+ ExternalView externalView = admin.getResourceExternalView(clusterName, resourceName);
+
+ if (externalView == null)
+ return assignedPartitions;
+
+ for (String partitionName : externalView.getPartitionSet()) {
+ Map<String, String> stateMap = externalView.getStateMap(partitionName);
+ if (stateMap == null)
+ continue;
+
+ for (String instanceName : stateMap.keySet()) {
+ String state = stateMap.get(instanceName);
+ if ("MASTER".equals(state) || "ONLINE".equals(state)) {
+ assignedPartitions.add(partitionName);
+ }
+ }
+ }
+
+ return assignedPartitions;
+ }
+
+ public static ZkServer createLocalZookeeper() throws Exception {
+ String baseDir = "/tmp/metamanager/";
+ final String dataDir = baseDir + "zk/dataDir";
+ final String logDir = baseDir + "zk/logDir";
+ FileUtils.deleteDirectory(new File(dataDir));
+ FileUtils.deleteDirectory(new File(logDir));
+
+ IDefaultNameSpace defaultNameSpace = new IDefaultNameSpace() {
+ @Override
+ public void createDefaultNameSpace(ZkClient zkClient) {
+
+ }
+ };
+ return new ZkServer(dataDir, logDir, defaultNameSpace, zkPort);
+ }
+
+ public static LocalContainerProviderProcess makeLocalProvider(String name) throws Exception {
+ LocalContainerProviderProcess process = new LocalContainerProviderProcess();
+ process.configure(makeProviderProperties(name));
+ return process;
+ }
+
+ public static ShellContainerProviderProcess makeShellProvider(String name) throws Exception {
+ ShellContainerProviderProcess process = new ShellContainerProviderProcess();
+ process.configure(makeProviderProperties(name));
+ return process;
+ }
+
+ public static YarnContainerProviderProcess makeYarnProvider(String name) throws Exception {
+ YarnContainerProviderProperties properties = new YarnContainerProviderProperties();
+
+ properties.putAll(makeProviderProperties(name));
+ properties.put(YarnContainerProviderProperties.YARNDATA, zkAddress);
+ properties.put(YarnContainerProviderProperties.RESOURCEMANAGER, resmanAddress);
+ properties.put(YarnContainerProviderProperties.SCHEDULER, schedulerAddress);
+ properties.put(YarnContainerProviderProperties.USER, yarnUser);
+ properties.put(YarnContainerProviderProperties.HDFS, hdfsAddress);
+
+ YarnContainerProviderProcess process = new YarnContainerProviderProcess();
+ process.configure(properties);
+
+ return process;
+ }
+
+ static ProviderProperties makeProviderProperties(String name) {
+ ProviderProperties properties = new ProviderProperties();
+ properties.putAll(providerProperties);
+ properties.setProperty(ProviderProperties.NAME, name);
+ return properties;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtilsTest.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtilsTest.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtilsTest.java
new file mode 100644
index 0000000..3f0bd3e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtilsTest.java
@@ -0,0 +1,30 @@
+package org.apache.helix.metamanager;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.testng.annotations.Test;
+
+public class TestUtilsTest {
+
+ @Test
+ public void testStartStop() throws Exception {
+ TestUtils.startTestCluster(new TestStatusProvider(1),
+ Collections.<ClusterContainerProvider>singletonList(new TestContainerProvider("test")));
+ TestUtils.stopTestCluster();
+ }
+
+ @Test
+ public void testStartStopRepeated() throws Exception {
+ ClusterStatusProvider statusProvider = new TestStatusProvider(1);
+ List<ClusterContainerProvider> containerProviders = Collections.<ClusterContainerProvider>singletonList(new TestContainerProvider("test"));
+
+ TestUtils.startTestCluster(statusProvider, containerProviders);
+ TestUtils.stopTestCluster();
+
+ TestUtils.startTestCluster(statusProvider, containerProviders);
+ TestUtils.stopTestCluster();
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtilsUT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtilsUT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtilsUT.java
new file mode 100644
index 0000000..50d7121
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/TestUtilsUT.java
@@ -0,0 +1,63 @@
+package org.apache.helix.metamanager;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.StatusProviderService;
+import org.apache.helix.metamanager.TargetProviderService;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.local.LocalStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.Test;
+
+/**
+ * Self-test of test cluster. Spawning zookeeper and cluster with single provider and single instance.
+ *
+ * @see TestUtils
+ */
+@Test(groups={"unit"})
+public class TestUtilsUT {
+
+ static final Logger log = Logger.getLogger(TestUtilsUT.class);
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testZookeeper() throws Exception {
+ log.info("testing zookeeper");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testCluster() throws Exception {
+ log.info("testing cluster");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+
+ TestUtils.startTestCluster(new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, 1)),
+ new LocalStatusProvider(), TestUtils.makeLocalProvider("test"));
+ TestUtils.stopTestCluster();
+
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testClusterRepeated() throws Exception {
+ log.info("testing cluster restart");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+
+ TargetProviderService statusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, 1));
+ Service containerProvider = TestUtils.makeLocalProvider("test");
+ StatusProviderService containerStatusProvider = new LocalStatusProvider();
+
+ TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+ TestUtils.stopTestCluster();
+
+ TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+ TestUtils.stopTestCluster();
+
+ TestUtils.stopZookeeper();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/YarnContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/YarnContainerProviderIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/YarnContainerProviderIT.java
new file mode 100644
index 0000000..5d319ff
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/YarnContainerProviderIT.java
@@ -0,0 +1,101 @@
+package org.apache.helix.metamanager;
+
+import java.util.Collections;
+
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProperties;
+import org.apache.helix.metamanager.impl.yarn.YarnStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Yarn container provider and yarn status provider test. Scale-up and -down
+ * only, no failures.
+ *
+ * @see YarnContainerProvider
+ * @see YarnStatusProvider
+ */
+@Test(groups={"integration", "yarn"})
+public class YarnContainerProviderIT {
+
+ static final Logger log = Logger.getLogger(YarnContainerProviderIT.class);
+
+ static final int CONTAINER_COUNT = 4;
+
+ StaticTargetProvider clusterStatusProvider;
+ YarnContainerProviderProcess containerProvider;
+ YarnStatusProvider containerStatusProvider;
+
+ YarnContainerProviderProperties properties;
+
+ @BeforeClass(alwaysRun = true)
+ public void setupClass() throws Exception {
+ log.info("installing shutdown hook");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ try { teardownTest(); } catch(Exception ignore) {};
+ }
+ }));
+ }
+
+ @BeforeMethod(alwaysRun = true)
+ public void setupTest() throws Exception {
+ log.debug("setting up yarn test case");
+
+ teardownTest();
+ TestUtils.configure("distributed.properties");
+ TestUtils.startZookeeper();
+
+ containerProvider = TestUtils.makeYarnProvider("provider_0");
+ containerStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+ clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+
+ log.debug("running yarn test case");
+ }
+
+ @AfterMethod(alwaysRun = true)
+ public void teardownTest() throws Exception {
+ log.debug("cleaning up yarn test case");
+ TestUtils.stopTestCluster();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testStatic() throws Exception {
+ log.info("testing static");
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleUp() throws Exception {
+ log.info("testing scale up");
+ setContainerCount(CONTAINER_COUNT + 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleDown() throws Exception {
+ log.info("testing scale down");
+ setContainerCount(CONTAINER_COUNT - 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleCycle() throws Exception {
+ log.info("testing scale cycle");
+ setContainerCount(CONTAINER_COUNT + 2);
+ setContainerCount(CONTAINER_COUNT);
+ setContainerCount(CONTAINER_COUNT - 2);
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ void setContainerCount(int newContainerCount) throws Exception {
+ log.debug(String.format("Setting container count to %d", newContainerCount));
+ clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+ TestUtils.rebalanceTestCluster();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/BootstrapperIT.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/BootstrapperIT.java b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/BootstrapperIT.java
new file mode 100644
index 0000000..a7bae00
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/test/java/org/apache/helix/metamanager/integration/BootstrapperIT.java
@@ -0,0 +1,127 @@
+package org.apache.helix.metamanager.integration;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.TestUtils;
+import org.apache.helix.metamanager.bootstrapper.Boot;
+import org.apache.helix.metamanager.bootstrapper.ClusterService;
+import org.apache.helix.metamanager.bootstrapper.ControllerService;
+import org.apache.helix.metamanager.bootstrapper.MetaClusterService;
+import org.apache.helix.metamanager.bootstrapper.MetaControllerService;
+import org.apache.helix.metamanager.bootstrapper.MetaProviderService;
+import org.apache.helix.metamanager.bootstrapper.MetaResourceService;
+import org.apache.helix.metamanager.bootstrapper.ResourceService;
+import org.apache.helix.metamanager.bootstrapper.ZookeeperService;
+import org.apache.log4j.Logger;
+import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class BootstrapperIT {
+
+ static final Logger log = Logger.getLogger(BootstrapperIT.class);
+
+ Boot boot;
+ HelixAdmin admin;
+
+ @AfterMethod
+ public void teardown() throws Exception {
+ log.debug("tearing down bootstrap test");
+ if(admin != null) {
+ admin.close();
+ admin = null;
+ }
+ if (boot != null) {
+ boot.stop();
+ boot = null;
+ }
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void bootstrapLocalTest() throws Exception {
+ boot = new Boot();
+ boot.configure(getProperties("BootLocal.properties"));
+ boot.start();
+
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), ZookeeperService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), ClusterService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), ResourceService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), ControllerService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaClusterService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaResourceService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaProviderService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaControllerService.class));
+
+ final long limit = System.currentTimeMillis() + TestUtils.REBALANCE_TIMEOUT;
+
+ admin = new ZKHelixAdmin("localhost:2199");
+ waitUntil(admin, "meta", "container", 1, 7, (limit - System.currentTimeMillis()));
+ waitUntil(admin, "cluster", "resource", 7, 10, (limit - System.currentTimeMillis()));
+
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void bootstrap2By2LocalTest() throws Exception {
+ boot = new Boot();
+ boot.configure(getProperties("Boot2By2Local.properties"));
+ boot.start();
+
+ verify2By2Setup();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void bootstrap2By2ShellTest() throws Exception {
+ boot = new Boot();
+ boot.configure(getProperties("Boot2By2Shell.properties"));
+ boot.start();
+
+ verify2By2Setup();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void bootstrap2By2YarnTest() throws Exception {
+ boot = new Boot();
+ boot.configure(getProperties("Boot2By2Yarn.properties"));
+ boot.start();
+
+ verify2By2Setup();
+ }
+
+ void verify2By2Setup() throws Exception {
+ final long limit = System.currentTimeMillis() + TestUtils.REBALANCE_TIMEOUT;
+ final String address = "localhost:2199";
+
+ log.debug(String.format("connecting to zookeeper at '%s'", address));
+
+ admin = new ZKHelixAdmin(address);
+ waitUntil(admin, "meta", "database", 2, 3, (limit - System.currentTimeMillis()));
+ waitUntil(admin, "meta", "webserver", 2, 5, (limit - System.currentTimeMillis()));
+ waitUntil(admin, "cluster", "dbprod", 3, 8, (limit - System.currentTimeMillis()));
+ waitUntil(admin, "cluster", "wsprod", 5, 15, (limit - System.currentTimeMillis()));
+ }
+
+ static void waitUntil(HelixAdmin admin, String cluster, String resource, int instanceCount, int partitionCount, long timeout) throws Exception {
+ final long limit = System.currentTimeMillis() + timeout;
+ TestUtils.waitUntilInstanceCount(admin, cluster, resource, instanceCount, (limit - System.currentTimeMillis()));
+ TestUtils.waitUntilPartitionCount(admin, cluster, resource, partitionCount, (limit - System.currentTimeMillis()));
+ }
+
+ static Properties getProperties(String resourcePath) throws IOException {
+ Properties properties = new Properties();
+ properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+ return properties;
+ }
+
+ static boolean containsInstanceOf(Collection<Service> services, Class<?> clazz) {
+ for(Service service : services) {
+ if(clazz.isAssignableFrom(service.getClass())) return true;
+ }
+ return false;
+ }
+
+}
[06/15] Adding Helix-task-framework and Yarn integration modules
Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProcess.java
new file mode 100644
index 0000000..110fe68
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProcess.java
@@ -0,0 +1,64 @@
+package org.apache.helix.metamanager.impl.local;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.metamanager.container.ContainerStateModelFactory;
+import org.apache.log4j.Logger;
+
+public class LocalContainerProcess
+{
+ static final Logger log = Logger.getLogger(LocalContainerProcess.class);
+
+ private String clusterName;
+ private String zkAddress;
+ private String instanceName;
+ private HelixManager participantManager;
+
+ public LocalContainerProcess(String clusterName, String zkAddress, String instanceName)
+ {
+ this.clusterName = clusterName;
+ this.zkAddress = zkAddress;
+ this.instanceName = instanceName;
+
+ }
+
+ public void start() throws Exception
+ {
+ log.info("STARTING "+ instanceName);
+ participantManager = HelixManagerFactory.getZKHelixManager(clusterName,
+ instanceName, InstanceType.PARTICIPANT, zkAddress);
+ participantManager.getStateMachineEngine().registerStateModelFactory(
+ "MasterSlave", new ContainerStateModelFactory());
+ participantManager.connect();
+ log.info("STARTED "+ instanceName);
+
+ }
+
+ public void stop()
+ {
+ if (participantManager != null)
+ {
+ participantManager.disconnect();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProvider.java
new file mode 100644
index 0000000..49c9d42
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProvider.java
@@ -0,0 +1,119 @@
+package org.apache.helix.metamanager.impl.local;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.ContainerProvider;
+import org.apache.helix.metamanager.ContainerProviderService;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.metamanager.container.ContainerUtils;
+import org.apache.helix.metamanager.impl.local.LocalContainerSingleton.LocalProcess;
+import org.apache.helix.metamanager.provider.ProviderProperties;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * {@link ContainerProvider} spawning VM-local containers. Only works in single-VM
+ * deployments as container metadata is managed via singleton.
+ *
+ * @see LocalContainerSingleton
+ */
+class LocalContainerProvider implements ContainerProviderService {
+
+ static final Logger log = Logger.getLogger(LocalContainerProvider.class);
+
+ final Map<String, Properties> types = new HashMap<String, Properties>();
+
+ String address;
+ String cluster;
+ String name;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ ProviderProperties providerProperties = new ProviderProperties();
+ providerProperties.putAll(properties);
+ Preconditions.checkArgument(providerProperties.isValid());
+
+ this.address = providerProperties.getProperty("address");
+ this.cluster = providerProperties.getProperty("cluster");
+ this.name = providerProperties.getProperty("name");
+
+ for (String containerType : providerProperties.getContainers()) {
+ registerType(containerType, providerProperties.getContainer(containerType));
+ }
+ }
+
+ @Override
+ public void start() throws Exception {
+ // left blank
+ }
+
+ @Override
+ public void stop() throws Exception {
+ destroyAll();
+ }
+
+ @Override
+ public void create(String id, String type) throws Exception {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ Preconditions.checkState(!processes.containsKey(id), "Process '%s' already exists", id);
+ Preconditions.checkState(types.containsKey(type), "Type '%s' is not registered", type);
+
+ ContainerProcessProperties properties = new ContainerProcessProperties(types.get(type));
+
+ properties.setProperty(ContainerProcessProperties.CLUSTER, cluster);
+ properties.setProperty(ContainerProcessProperties.NAME, id);
+ properties.setProperty(ContainerProcessProperties.ADDRESS, address);
+
+ log.info(String.format("Running container '%s' (properties='%s')", id, properties));
+
+ ContainerProcess process = ContainerUtils.createProcess(properties);
+ process.start();
+
+ processes.put(id, new LocalProcess(id, name, process));
+
+ }
+ }
+
+ @Override
+ public void destroy(String id) throws Exception {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ if (!processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+ log.info(String.format("Destroying container '%s'", id));
+
+ LocalProcess local = processes.remove(id);
+
+ local.process.stop();
+ }
+ }
+
+ @Override
+ public void destroyAll() {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ log.info("Destroying all processes");
+ for (LocalProcess local : new HashSet<LocalProcess>(processes.values())) {
+ if (local.owner.equals(name)) {
+ try { destroy(local.id); } catch (Exception ignore) {}
+ }
+ }
+ }
+ }
+
+ void registerType(String name, Properties properties) {
+ log.debug(String.format("Registering container type '%s' (properties='%s')", name, properties));
+ types.put(name, properties);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProviderProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProviderProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProviderProcess.java
new file mode 100644
index 0000000..ed090bc
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerProviderProcess.java
@@ -0,0 +1,45 @@
+package org.apache.helix.metamanager.impl.local;
+
+import java.util.Properties;
+
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.provider.ProviderProcess;
+import org.apache.helix.metamanager.provider.ProviderProperties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Configurable and runnable service for {@link LocalContainerProvider}.
+ *
+ */
+public class LocalContainerProviderProcess implements Service {
+ LocalContainerProvider provider;
+ ProviderProcess process;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ ProviderProperties providerProperties = new ProviderProperties();
+ providerProperties.putAll(properties);
+
+ Preconditions.checkArgument(providerProperties.isValid(), "provider properties not valid (properties='%s')", properties);
+
+ provider = new LocalContainerProvider();
+ provider.configure(properties);
+
+ process = new ProviderProcess();
+ process.configure(providerProperties);
+ process.setConteinerProvider(provider);
+ }
+
+ @Override
+ public void start() throws Exception {
+ provider.start();
+ process.start();
+ }
+
+ @Override
+ public void stop() throws Exception {
+ process.stop();
+ provider.stop();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerSingleton.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerSingleton.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerSingleton.java
new file mode 100644
index 0000000..b91a848
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerSingleton.java
@@ -0,0 +1,56 @@
+package org.apache.helix.metamanager.impl.local;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.helix.metamanager.container.ContainerProcess;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Singleton tracking metadata for VM-local containers spawned via
+ * {@link LocalContainerProvider}.
+ *
+ */
+public class LocalContainerSingleton {
+ final static Map<String, LocalProcess> processes = new HashMap<String, LocalProcess>();
+
+ private LocalContainerSingleton() {
+ // left blank
+ }
+
+ public static Map<String, LocalProcess> getProcesses() {
+ return processes;
+ }
+
+ public static void reset() {
+ synchronized (processes) {
+ for (LocalProcess local : processes.values()) {
+ local.process.stop();
+ }
+ processes.clear();
+ }
+ }
+
+ public static void killProcess(String id) throws InterruptedException {
+ synchronized (processes) {
+ Preconditions.checkArgument(processes.containsKey(id), "Process '%s' does not exist", id);
+ ContainerProcess process = processes.get(id).process;
+ process.stop();
+ processes.remove(id);
+ }
+ }
+
+ static class LocalProcess {
+ final String id;
+ final String owner;
+ final ContainerProcess process;
+
+ public LocalProcess(String id, String owner, ContainerProcess process) {
+ this.id = id;
+ this.owner = owner;
+ this.process = process;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerStatusProvider.java
new file mode 100644
index 0000000..604ad25
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalContainerStatusProvider.java
@@ -0,0 +1,37 @@
+package org.apache.helix.metamanager.impl.local;
+
+import java.util.Map;
+
+import org.apache.helix.metamanager.ContainerStatusProvider;
+import org.apache.helix.metamanager.impl.local.LocalContainerSingleton.LocalProcess;
+
+public class LocalContainerStatusProvider implements ContainerStatusProvider {
+
+ @Override
+ public boolean exists(String id) {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ return processes.containsKey(id);
+ }
+ }
+
+ @Override
+ public boolean isActive(String id) {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ return processes.get(id).process.isActive();
+ }
+ }
+
+ @Override
+ public boolean isFailed(String id) {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ return processes.get(id).process.isFailed();
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalStatusProvider.java
new file mode 100644
index 0000000..c53a3ba
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/local/LocalStatusProvider.java
@@ -0,0 +1,53 @@
+package org.apache.helix.metamanager.impl.local;
+
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.StatusProviderService;
+import org.apache.helix.metamanager.impl.local.LocalContainerSingleton.LocalProcess;
+
+/**
+ * StatusProvider for VM-local containers spawned via
+ * {@link LocalContainerProvider}. Runnable and configurable service.
+ *
+ */
+public class LocalStatusProvider implements StatusProviderService {
+
+ @Override
+ public boolean exists(String id) {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ return processes.containsKey(id);
+ }
+ }
+
+ @Override
+ public boolean isHealthy(String id) {
+ Map<String, LocalProcess> processes = LocalContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ LocalProcess local = processes.get(id);
+
+ if (local == null)
+ return false;
+
+ return local.process.isActive();
+ }
+ }
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ // left blank
+ }
+
+ @Override
+ public void start() throws Exception {
+ // left blank
+ }
+
+ @Override
+ public void stop() throws Exception {
+ // left blank
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProcess.java
new file mode 100644
index 0000000..0069110
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProcess.java
@@ -0,0 +1,93 @@
+package org.apache.helix.metamanager.impl.shell;
+
+import java.io.File;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.metamanager.container.ContainerUtils;
+import org.apache.log4j.Logger;
+
+/**
+ * Host process for Shell-based container. ContainerProcess configuration is
+ * read from path in first command-line argument. Status is maintained using
+ * temporary marker file. (Program entry point)
+ *
+ */
+class ShellContainerProcess {
+ static final Logger log = Logger.getLogger(ShellContainerProcess.class);
+
+ public static final long MONITOR_INTERVAL = 5000;
+
+ static String markerDir;
+ static ContainerProcess process;
+ static ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
+
+ public static void main(String[] args) throws Exception {
+ final String propertiesPath = args[0];
+ markerDir = args[1];
+
+ ContainerProcessProperties properties = ContainerUtils.getPropertiesFromPath(propertiesPath);
+
+ process = ContainerUtils.createProcess(properties);
+
+ log.debug("Installing shutdown hooks");
+ Runtime.getRuntime().addShutdownHook(new Thread() {
+ @Override
+ public void run() {
+ log.debug("Running shutdown hook");
+ try {
+ ShellContainerProcess.stop();
+ } catch (Exception ignore) {
+ }
+ }
+ });
+
+ log.debug("Launching shell container process");
+ process.start();
+
+ ShellUtils.createMarker(new File(markerDir));
+
+ log.debug("Launching process monitor");
+ executor.scheduleAtFixedRate(new ProcessMonitor(), 0, MONITOR_INTERVAL, TimeUnit.MILLISECONDS);
+ }
+
+ static void stop() throws InterruptedException {
+ log.debug("Shutting down shell process");
+ if (process != null) {
+ process.stop();
+ ShellUtils.destroyMarker(new File(markerDir));
+ }
+ if (executor != null) {
+ executor.shutdownNow();
+ while (!executor.isTerminated()) {
+ Thread.sleep(100);
+ }
+ executor = null;
+ }
+ }
+
+ static class ProcessMonitor implements Runnable {
+ @Override
+ public void run() {
+ if (process.isFailed()) {
+ log.warn("detected process failure");
+ try {
+ ShellContainerProcess.stop();
+ } catch (Exception ignore) {
+ }
+ System.exit(1);
+ }
+ if (!process.isActive()) {
+ log.warn("detected process shutdown");
+ try {
+ ShellContainerProcess.stop();
+ } catch (Exception ignore) {
+ }
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProvider.java
new file mode 100644
index 0000000..69e2553
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProvider.java
@@ -0,0 +1,151 @@
+package org.apache.helix.metamanager.impl.shell;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.metamanager.ContainerProvider;
+import org.apache.helix.metamanager.ContainerProviderService;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.metamanager.impl.shell.ShellContainerSingleton.ShellProcess;
+import org.apache.helix.metamanager.provider.ProviderProperties;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.io.Files;
+
+/**
+ * {@link ContainerProvider} spawning shell-based containers. Only works in single-VM
+ * deployments as container metadata is managed via singleton.
+ *
+ * @see ShellContainerSingleton
+ */
+class ShellContainerProvider implements ContainerProviderService {
+
+ static final Logger log = Logger.getLogger(ShellContainerProvider.class);
+
+ static final String RUN_COMMAND = "/bin/sh";
+
+ static final long POLL_INTERVAL = 1000;
+ static final long CONTAINER_TIMEOUT = 60000;
+
+ // global view of processes required
+ static final Map<String, ShellProcess> processes = new HashMap<String, ShellProcess>();
+
+ final Map<String, Properties> types = new HashMap<String, Properties>();
+
+ String address;
+ String cluster;
+ String name;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ Preconditions.checkNotNull(properties);
+ ProviderProperties providerProperties = new ProviderProperties();
+ providerProperties.putAll(properties);
+ Preconditions.checkArgument(providerProperties.isValid());
+
+ this.address = providerProperties.getProperty("address");
+ this.cluster = providerProperties.getProperty("cluster");
+ this.name = providerProperties.getProperty("name");
+
+ for (String containerType : providerProperties.getContainers()) {
+ registerType(containerType, providerProperties.getContainer(containerType));
+ }
+ }
+
+ @Override
+ public void start() throws Exception {
+ // left blank
+ }
+
+ @Override
+ public void stop() throws Exception {
+ destroyAll();
+ }
+
+ @Override
+ public void create(String id, String type) throws Exception {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ Preconditions.checkState(!processes.containsKey(id), "Process '%s' already exists", id);
+ Preconditions.checkState(types.containsKey(type), "Type '%s' is not registered", type);
+
+ ContainerProcessProperties properties = new ContainerProcessProperties(types.get(type));
+
+ properties.setProperty(ContainerProcessProperties.CLUSTER, cluster);
+ properties.setProperty(ContainerProcessProperties.NAME, id);
+ properties.setProperty(ContainerProcessProperties.ADDRESS, address);
+
+ File tmpDir = Files.createTempDir();
+ File tmpProperties = new File(tmpDir.getCanonicalPath() + File.separator + ShellUtils.SHELL_CONTAINER_PROPERTIES);
+ File tmpMarker = new File(tmpDir.getCanonicalPath());
+
+ properties.store(new FileWriter(tmpProperties), id);
+
+ log.info(String.format("Running container '%s' (properties='%s')", id, properties));
+
+ log.debug(String.format("Invoking command '%s %s %s %s'", RUN_COMMAND, ShellUtils.SHELL_CONTAINER_PATH, tmpProperties.getCanonicalPath(),
+ tmpMarker.getCanonicalPath()));
+
+ ProcessBuilder builder = new ProcessBuilder();
+ builder.command(RUN_COMMAND, ShellUtils.SHELL_CONTAINER_PATH, tmpProperties.getCanonicalPath(), tmpMarker.getCanonicalPath());
+
+ Process process = builder.start();
+
+ processes.put(id, new ShellProcess(id, name, process, tmpDir));
+
+ long limit = System.currentTimeMillis() + CONTAINER_TIMEOUT;
+ while (!ShellUtils.hasMarker(tmpDir)) {
+ if (System.currentTimeMillis() >= limit) {
+ throw new TimeoutException(String.format("Container '%s' failed to reach active state", id));
+ }
+ Thread.sleep(POLL_INTERVAL);
+ }
+ }
+ }
+
+ @Override
+ public void destroy(String id) throws Exception {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ if (!processes.containsKey(id))
+ throw new IllegalArgumentException(String.format("Process '%s' does not exists", id));
+
+ log.info(String.format("Destroying container '%s'", id));
+
+ ShellProcess shell = processes.remove(id);
+ shell.process.destroy();
+ shell.process.waitFor();
+
+ FileUtils.deleteDirectory(shell.tmpDir);
+ }
+ }
+
+ @Override
+ public void destroyAll() {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ log.info("Destroying all owned processes");
+ for (ShellProcess process : new HashSet<ShellProcess>(processes.values())) {
+ if (process.owner.equals(name)) {
+ try { destroy(process.id); } catch (Exception ignore) {}
+ }
+ }
+ }
+ }
+
+ void registerType(String name, Properties properties) {
+ log.debug(String.format("Registering container type '%s' (properties='%s')", name, properties));
+ types.put(name, properties);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProviderProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProviderProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProviderProcess.java
new file mode 100644
index 0000000..36f150d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerProviderProcess.java
@@ -0,0 +1,45 @@
+package org.apache.helix.metamanager.impl.shell;
+
+import java.util.Properties;
+
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.provider.ProviderProcess;
+import org.apache.helix.metamanager.provider.ProviderProperties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Configurable and runnable service for {@link ShellContainerProvider}.
+ *
+ */
+public class ShellContainerProviderProcess implements Service {
+ ShellContainerProvider provider;
+ ProviderProcess process;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ ProviderProperties providerProperties = new ProviderProperties();
+ providerProperties.putAll(properties);
+
+ Preconditions.checkArgument(providerProperties.isValid(), "provider properties not valid (properties='%s')", properties);
+
+ provider = new ShellContainerProvider();
+ provider.configure(properties);
+
+ process = new ProviderProcess();
+ process.configure(providerProperties);
+ process.setConteinerProvider(provider);
+ }
+
+ @Override
+ public void start() throws Exception {
+ provider.start();
+ process.start();
+ }
+
+ @Override
+ public void stop() throws Exception {
+ process.stop();
+ provider.stop();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerSingleton.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerSingleton.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerSingleton.java
new file mode 100644
index 0000000..91054e1
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerSingleton.java
@@ -0,0 +1,58 @@
+package org.apache.helix.metamanager.impl.shell;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Singleton tracking metadata for shell-based containers spawned via
+ * {@link ShellContainerProvider}.
+ *
+ */
+public class ShellContainerSingleton {
+ static final Map<String, ShellProcess> processes = new HashMap<String, ShellProcess>();
+
+ private ShellContainerSingleton() {
+ // left blank
+ }
+
+ public static Map<String, ShellProcess> getProcesses() {
+ return processes;
+ }
+
+ public static void reset() {
+ synchronized (processes) {
+ for (ShellProcess shell : processes.values()) {
+ shell.process.destroy();
+ try { shell.process.waitFor(); } catch(Exception ignore) {}
+ }
+ processes.clear();
+ }
+ }
+
+ public static void killProcess(String id) throws InterruptedException {
+ synchronized (processes) {
+ Preconditions.checkArgument(processes.containsKey(id), "Process '%s' does not exist", id);
+ Process process = processes.get(id).process;
+ process.destroy();
+ process.waitFor();
+ processes.remove(id);
+ }
+ }
+
+ static class ShellProcess {
+ final String id;
+ final String owner;
+ final Process process;
+ final File tmpDir;
+
+ public ShellProcess(String id, String owner, Process process, File tmpDir) {
+ this.id = id;
+ this.owner = owner;
+ this.process = process;
+ this.tmpDir = tmpDir;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerStatusProvider.java
new file mode 100644
index 0000000..03e55c6
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellContainerStatusProvider.java
@@ -0,0 +1,52 @@
+package org.apache.helix.metamanager.impl.shell;
+
+import java.util.Map;
+
+import org.apache.helix.metamanager.ContainerStatusProvider;
+import org.apache.helix.metamanager.impl.shell.ShellContainerSingleton.ShellProcess;
+
+public class ShellContainerStatusProvider implements ContainerStatusProvider {
+
+ @Override
+ public boolean exists(String id) {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ return processes.containsKey(id);
+ }
+ }
+
+ @Override
+ public boolean isActive(String id) {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ ShellProcess shell = processes.get(id);
+
+ try {
+ shell.process.exitValue();
+ return false;
+ } catch (IllegalThreadStateException e) {
+ // still running
+ return true;
+ }
+ }
+ }
+
+ @Override
+ public boolean isFailed(String id) {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ ShellProcess shell = processes.get(id);
+
+ try {
+ return (shell.process.exitValue() != 0);
+ } catch (IllegalThreadStateException e) {
+ // still running
+ return false;
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellStatusProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellStatusProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellStatusProvider.java
new file mode 100644
index 0000000..015218b
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellStatusProvider.java
@@ -0,0 +1,64 @@
+package org.apache.helix.metamanager.impl.shell;
+
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.StatusProviderService;
+import org.apache.helix.metamanager.impl.shell.ShellContainerSingleton.ShellProcess;
+
+/**
+ * StatusProvider for shell-based containers spawned via
+ * {@link ShellContainerProvider}. Runnable and configurable service.
+ *
+ */
+public class ShellStatusProvider implements StatusProviderService {
+
+ @Override
+ public boolean exists(String id) {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ return processes.containsKey(id);
+ }
+ }
+
+ @Override
+ public boolean isHealthy(String id) {
+ Map<String, ShellProcess> processes = ShellContainerSingleton.getProcesses();
+
+ synchronized (processes) {
+ ShellProcess shell = processes.get(id);
+
+ if (shell == null)
+ return false;
+
+ if (!ShellUtils.hasMarker(shell.tmpDir))
+ return false;
+
+ try {
+ // exit value
+ shell.process.exitValue();
+ return false;
+ } catch (IllegalThreadStateException e) {
+ // expected
+ }
+
+ return true;
+ }
+ }
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ // left blank
+ }
+
+ @Override
+ public void start() throws Exception {
+ // left blank
+ }
+
+ @Override
+ public void stop() throws Exception {
+ // left blank
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellUtils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellUtils.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellUtils.java
new file mode 100644
index 0000000..dcec2ae
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/shell/ShellUtils.java
@@ -0,0 +1,54 @@
+package org.apache.helix.metamanager.impl.shell;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.log4j.Logger;
+
+/**
+ * Utility for creating and destroying temporary marker files for shell-based
+ * containers.
+ *
+ */
+class ShellUtils {
+
+ static final Logger log = Logger.getLogger(ShellUtils.class);
+
+ static final String SHELL_CONTAINER_PATH = "target/metamanager-pkg/bin/shell-container-process.sh";
+ static final String SHELL_CONTAINER_PROPERTIES = "container.properties";
+ static final String SHELL_CONTAINER_MARKER = "active";
+
+ private ShellUtils() {
+ // left blank
+ }
+
+ public static boolean hasMarker(File processDir) {
+ try {
+ log.debug(String.format("checking for marker file '%s'", getMarkerFile(processDir)));
+ if (getMarkerFile(processDir).exists())
+ return true;
+ } catch (IOException e) {
+ // ignore
+ }
+ return false;
+ }
+
+ public static void createMarker(File processDir) throws IOException {
+ log.debug(String.format("creating marker file '%s'", getMarkerFile(processDir)));
+ getMarkerFile(processDir).createNewFile();
+ }
+
+ public static void destroyMarker(File processDir) {
+ try {
+ log.debug(String.format("destroying marker file '%s'", getMarkerFile(processDir)));
+ getMarkerFile(processDir).delete();
+ } catch (IOException e) {
+ // ignore
+ }
+ }
+
+ public static File getMarkerFile(File processDir) throws IOException {
+ return new File(processDir.getCanonicalPath() + File.separatorChar + SHELL_CONTAINER_MARKER);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ApplicationConfig.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ApplicationConfig.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ApplicationConfig.java
new file mode 100644
index 0000000..9c85232
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ApplicationConfig.java
@@ -0,0 +1,32 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+public class ApplicationConfig {
+ final String clusterAddress;
+ final String clusterName;
+ final String metadataAddress;
+ final String providerName;
+
+ public ApplicationConfig(String clusterAddress, String clusterName,
+ String metadataAddress, String providerName) {
+ this.clusterAddress = clusterAddress;
+ this.clusterName = clusterName;
+ this.metadataAddress = metadataAddress;
+ this.providerName = providerName;
+ }
+
+ public String getClusterAddress() {
+ return clusterAddress;
+ }
+
+ public String getClusterName() {
+ return clusterName;
+ }
+
+ public String getMetadataAddress() {
+ return metadataAddress;
+ }
+
+ public String getProviderName() {
+ return providerName;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ContainerMetadata.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ContainerMetadata.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ContainerMetadata.java
new file mode 100644
index 0000000..7b25d31
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/ContainerMetadata.java
@@ -0,0 +1,80 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+
+class ContainerMetadata {
+
+ static enum ContainerState {
+ ACQUIRE,
+ CONNECTING,
+ ACTIVE,
+ TEARDOWN,
+ FAILED,
+ HALTED,
+ FINALIZE
+ }
+
+ String id;
+ ContainerState state;
+ int yarnId;
+ String owner;
+ YarnContainerProcessProperties properties;
+
+ public ContainerMetadata() {
+ // left blank
+ }
+
+ public ContainerMetadata(String id, String owner, YarnContainerProcessProperties properties) {
+ this.id = id;
+ this.state = ContainerState.ACQUIRE;
+ this.yarnId = -1;
+ this.owner = owner;
+ this.properties = properties;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public ContainerMetadata setId(String id) {
+ this.id = id;
+ return this;
+ }
+
+ public ContainerState getState() {
+ return state;
+ }
+
+ public ContainerMetadata setState(ContainerState state) {
+ this.state = state;
+ return this;
+ }
+
+ public int getYarnId() {
+ return yarnId;
+ }
+
+ public ContainerMetadata setYarnId(int yarnId) {
+ this.yarnId = yarnId;
+ return this;
+ }
+
+ public String getOwner() {
+ return owner;
+ }
+
+ public ContainerMetadata setOwner(String owner) {
+ this.owner = owner;
+ return this;
+ }
+
+ public YarnContainerProcessProperties getProperties() {
+ return properties;
+ }
+
+ public ContainerMetadata setProperties(YarnContainerProcessProperties properties) {
+ this.properties = properties;
+ return this;
+ }
+
+}
+
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/MetadataProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/MetadataProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/MetadataProvider.java
new file mode 100644
index 0000000..a35c16e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/MetadataProvider.java
@@ -0,0 +1,42 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.Collection;
+
+interface MetadataProvider {
+
+ public boolean exists(String id);
+
+ public void create(ContainerMetadata meta) throws MetadataException;
+
+ public ContainerMetadata read(String id) throws MetadataException;
+
+ public Collection<ContainerMetadata> readAll() throws MetadataException;
+
+ public void update(ContainerMetadata meta) throws MetadataException;
+
+ public void delete(String id) throws MetadataException;
+
+ public static class MetadataException extends Exception {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = -2846997013918977056L;
+
+ public MetadataException() {
+ super();
+ }
+
+ public MetadataException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public MetadataException(String message) {
+ super(message);
+ }
+
+ public MetadataException(Throwable cause) {
+ super(cause);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/MetadataService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/MetadataService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/MetadataService.java
new file mode 100644
index 0000000..35dd1ee
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/MetadataService.java
@@ -0,0 +1,42 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.Collection;
+
+public interface MetadataService {
+
+ public boolean exists(String id);
+
+ public void create(ContainerMetadata meta) throws MetadataServiceException;
+
+ public ContainerMetadata read(String id) throws MetadataServiceException;
+
+ public Collection<ContainerMetadata> readAll() throws MetadataServiceException;
+
+ public void update(ContainerMetadata meta) throws MetadataServiceException;
+
+ public void delete(String id) throws MetadataServiceException;
+
+ public static class MetadataServiceException extends Exception {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = -2846997013918977056L;
+
+ public MetadataServiceException() {
+ super();
+ }
+
+ public MetadataServiceException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public MetadataServiceException(String message) {
+ super(message);
+ }
+
+ public MetadataServiceException(Throwable cause) {
+ super(cause);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/Utils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/Utils.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/Utils.java
new file mode 100644
index 0000000..99f9a03
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/Utils.java
@@ -0,0 +1,94 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.impl.yarn.ContainerMetadata.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.TypeAdapter;
+import com.google.gson.stream.JsonReader;
+import com.google.gson.stream.JsonToken;
+import com.google.gson.stream.JsonWriter;
+
+public class Utils {
+
+ static final Logger log = Logger.getLogger(Utils.class);
+
+ static Gson gson;
+ static {
+ GsonBuilder builder = new GsonBuilder();
+ builder.registerTypeAdapter(ContainerState.class, new ContainerStateAdapter());
+ builder.setPrettyPrinting();
+ gson = builder.create();
+ }
+
+ static Map<String, LocalResource> dummyResources = createDummyResources();
+
+ static String toJson(ContainerMetadata meta) {
+ return gson.toJson(meta);
+ }
+
+ static ContainerMetadata fromJson(String json) {
+ return gson.fromJson(json, ContainerMetadata.class);
+ }
+
+ static Map<String, LocalResource> getDummyResources() {
+ return dummyResources;
+ }
+
+ private static Map<String, LocalResource> createDummyResources() {
+ File dummy = new File("/tmp/dummy");
+
+ if(!dummy.exists()) {
+ try {
+ dummy.createNewFile();
+ } catch(Exception e) {
+ log.error("could not create dummy file", e);
+ System.exit(1);
+ }
+ }
+
+ Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
+ Path path = new Path(dummy.toURI());
+ LocalResource localResource = Records.newRecord(LocalResource.class);
+ localResource.setType(LocalResourceType.FILE);
+ localResource.setVisibility(LocalResourceVisibility.APPLICATION);
+ localResource.setResource(ConverterUtils.getYarnUrlFromPath(path));
+ localResource.setTimestamp(dummy.lastModified());
+ localResource.setSize(dummy.length());
+ localResources.put("dummy", localResource);
+ return localResources;
+ }
+
+ static class ContainerStateAdapter extends TypeAdapter<ContainerState> {
+ @Override
+ public ContainerState read(JsonReader reader) throws IOException {
+ if (reader.peek() == JsonToken.NULL) {
+ reader.nextNull();
+ return null;
+ }
+ return ContainerState.valueOf(reader.nextString());
+ }
+
+ @Override
+ public void write(JsonWriter writer, ContainerState value) throws IOException {
+ if (value == null) {
+ writer.nullValue();
+ return;
+ }
+ writer.value(value.name());
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnApplication.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnApplication.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnApplication.java
new file mode 100644
index 0000000..3b7dcd6
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnApplication.java
@@ -0,0 +1,171 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.io.File;
+import java.net.InetSocketAddress;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ClientRMProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.ConfigTool;
+import org.apache.helix.metamanager.Service;
+import org.apache.log4j.Logger;
+
+class YarnApplication implements Service {
+
+ static final Logger log = Logger.getLogger(YarnApplication.class);
+
+ static final String ENV_CLUSTER_ADDRESS = "YA_CLUSTER_ADDRESS";
+ static final String ENV_CLUSTER_NAME = "YA_CLUSTER_NAME";
+ static final String ENV_METADATA_ADDRESS = "YA_METADATA_ADDRESS";
+ static final String ENV_PROVIDER_NAME = "YA_PROVIDER_NAME";
+
+ static String YARN_MASTER_COMMAND = "/bin/sh %s 1>%s/stdout 2>%s/stderr";
+
+ Configuration conf;
+ YarnRPC rpc;
+ ClientRMProtocol rmClient;
+ ApplicationId appId;
+ File propertiesFile;
+
+ YarnApplicationProperties properties;
+
+ public YarnApplication() {
+ // left blank
+ }
+
+ public YarnApplication(YarnApplicationProperties properties) {
+ this.properties = properties;
+ internalConf();
+ }
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ YarnApplicationProperties yarnProps = new YarnApplicationProperties();
+ yarnProps.putAll(properties);
+ this.properties = yarnProps;
+ internalConf();
+ }
+
+ public void internalConf() {
+ this.conf = new YarnConfiguration();
+ this.conf.set(YarnConfiguration.RM_ADDRESS, properties.getYarnResourceManager());
+ this.conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, properties.getYarnScheduler());
+ this.conf.set(FileSystem.FS_DEFAULT_NAME_KEY, properties.getYarnHdfs());
+
+ this.rpc = YarnRPC.create(conf);
+ }
+
+ @Override
+ public void start() throws Exception {
+ connect();
+
+ String command = String.format(YARN_MASTER_COMMAND, ConfigTool.YARN_MASTER_PATH,
+ ApplicationConstants.LOG_DIR_EXPANSION_VAR, ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+
+ log.info(String.format("Starting application '%s' provider '%s' (masterCommand='%s')",
+ properties.getProviderMetadata(), properties.getProviderName(), command));
+
+ log.debug(String.format("Running master command \"%s\"", command));
+
+ // app id
+ GetNewApplicationRequest appRequest = Records.newRecord(GetNewApplicationRequest.class);
+ GetNewApplicationResponse appResponse = rmClient.getNewApplication(appRequest);
+
+ this.appId = appResponse.getApplicationId();
+
+ log.info(String.format("Acquired app id '%s' for '%s'", appId.toString(), properties.getProviderName()));
+
+ // command
+ ContainerLaunchContext launchContext = Records.newRecord(ContainerLaunchContext.class);
+ launchContext.setCommands(Collections.singletonList(command));
+
+ // resource limit
+ Resource resource = Records.newRecord(Resource.class);
+ resource.setMemory(256); // TODO make dynamic
+ launchContext.setResource(resource);
+
+ // environment
+ Map<String, String> env = new HashMap<String, String>();
+ launchContext.setEnvironment(env);
+
+ // configuration
+ propertiesFile = YarnUtils.writePropertiesToTemp(properties);
+
+ // HDFS
+ final String namespace = appId.toString();
+ final Path masterArchive = YarnUtils.copyToHdfs(ConfigTool.YARN_MASTER_ARCHIVE_PATH, ConfigTool.YARN_MASTER_STAGING, namespace, conf);
+ final Path masterProperties = YarnUtils.copyToHdfs(propertiesFile.getCanonicalPath(), ConfigTool.YARN_MASTER_PROPERTIES, namespace, conf);
+ final Path containerArchive = YarnUtils.copyToHdfs(ConfigTool.YARN_CONTAINER_ARCHIVE_PATH, ConfigTool.YARN_CONTAINER_STAGING, namespace, conf);
+
+ // local resources
+ Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
+ localResources.put(ConfigTool.YARN_MASTER_DESTINATION,
+ YarnUtils.createHdfsResource(masterArchive, LocalResourceType.ARCHIVE, conf));
+ localResources.put(ConfigTool.YARN_MASTER_PROPERTIES,
+ YarnUtils.createHdfsResource(masterProperties, LocalResourceType.FILE, conf));
+ localResources.put(ConfigTool.YARN_CONTAINER_STAGING,
+ YarnUtils.createHdfsResource(containerArchive, LocalResourceType.FILE, conf));
+
+ launchContext.setLocalResources(localResources);
+
+ // user
+ launchContext.setUser(properties.getYarnUser());
+
+ // app submission
+ ApplicationSubmissionContext subContext = Records.newRecord(ApplicationSubmissionContext.class);
+ subContext.setApplicationId(appId);
+ subContext.setApplicationName(properties.getProviderName());
+ subContext.setAMContainerSpec(launchContext);
+
+ SubmitApplicationRequest subRequest = Records.newRecord(SubmitApplicationRequest.class);
+ subRequest.setApplicationSubmissionContext(subContext);
+
+ log.info(String.format("Starting app id '%s'", appId.toString()));
+
+ rmClient.submitApplication(subRequest);
+
+ }
+
+ @Override
+ public void stop() throws YarnRemoteException {
+ log.info(String.format("Stopping app id '%s'", appId.toString()));
+ KillApplicationRequest killRequest = Records.newRecord(KillApplicationRequest.class);
+ killRequest.setApplicationId(appId);
+
+ rmClient.forceKillApplication(killRequest);
+
+ try { YarnUtils.destroyHdfsNamespace(appId.toString(), conf); } catch(Exception ignore) {}
+
+ propertiesFile.delete();
+ }
+
+ void connect() {
+ YarnConfiguration yarnConf = new YarnConfiguration(conf);
+ InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(
+ YarnConfiguration.RM_ADDRESS,
+ YarnConfiguration.DEFAULT_RM_ADDRESS));
+ log.info("Connecting to ResourceManager at: " + rmAddress);
+ this.rmClient = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, conf));
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnApplicationProperties.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnApplicationProperties.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnApplicationProperties.java
new file mode 100644
index 0000000..e047179
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnApplicationProperties.java
@@ -0,0 +1,91 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.Properties;
+
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+
+import com.google.common.base.Preconditions;
+
+public class YarnApplicationProperties extends Properties {
+ /**
+ *
+ */
+ private static final long serialVersionUID = -2209509977839674160L;
+
+ public final static String HELIX_ZOOKEEPER = ContainerProcessProperties.HELIX_ZOOKEEPER;
+ public final static String HELIX_CLUSTER = ContainerProcessProperties.HELIX_CLUSTER;
+ public final static String PROVIDER_METADATA = "provider.metadata";
+ public final static String PROVIDER_NAME = "provider.name";
+ public final static String CONTAINER_ID = "container.id";
+ public final static String YARN_RESOURCEMANAGER = "yarn.resourcemananger";
+ public final static String YARN_SCHEDULER = "yarn.scheduler";
+ public final static String YARN_USER = "yarn.user";
+ public final static String YARN_HDFS= "yarn.hdfs";
+
+ public boolean isValidMaster() {
+ return containsKey(HELIX_ZOOKEEPER) &&
+ containsKey(HELIX_CLUSTER) &&
+ containsKey(PROVIDER_METADATA) &&
+ containsKey(PROVIDER_NAME) &&
+ containsKey(YARN_RESOURCEMANAGER) &&
+ containsKey(YARN_SCHEDULER) &&
+ containsKey(YARN_USER) &&
+ containsKey(YARN_HDFS);
+ }
+
+ public boolean isValidContainer() {
+ return containsKey(HELIX_ZOOKEEPER) &&
+ containsKey(HELIX_CLUSTER) &&
+ containsKey(PROVIDER_METADATA) &&
+ containsKey(CONTAINER_ID);
+ }
+
+ public String getHelixZookeeper() {
+ return getProperty(HELIX_ZOOKEEPER);
+ }
+
+ public String getHelixCluster() {
+ return getProperty(HELIX_CLUSTER);
+ }
+
+ public String getProviderMetadata() {
+ return getProperty(PROVIDER_METADATA);
+ }
+
+ public String getProviderName() {
+ return getProperty(PROVIDER_NAME);
+ }
+
+ public String getContainerId() {
+ return getProperty(CONTAINER_ID);
+ }
+
+ public String getYarnResourceManager() {
+ return getProperty(YARN_RESOURCEMANAGER);
+ }
+
+ public String getYarnScheduler() {
+ return getProperty(YARN_SCHEDULER);
+ }
+
+ public String getYarnUser() {
+ return getProperty(YARN_USER);
+ }
+
+ public String getYarnHdfs() {
+ return getProperty(YARN_HDFS);
+ }
+
+ @Override
+ public Object get(Object key) {
+ Preconditions.checkState(containsKey(key));
+ return super.get(key);
+ }
+
+ @Override
+ public String getProperty(String key) {
+ Preconditions.checkState(containsKey(key));
+ return super.getProperty(key);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerData.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerData.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerData.java
new file mode 100644
index 0000000..d369a2d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerData.java
@@ -0,0 +1,86 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+/**
+ * Container meta data for YARN-based containers. Reflect lifecycle of container
+ * from requesting, to bootstrapping, active operation and shutdown. Read and
+ * written by {@link YarnMasterProcess}, {@link YarnContainerProvider} and
+ * {@link YarnContainerService}. Also read by {@link YarnStatusProvider}.
+ * Typically stored in zookeeper
+ *
+ */
+class YarnContainerData {
+
+ static enum ContainerState {
+ ACQUIRE,
+ CONNECTING,
+ ACTIVE,
+ TEARDOWN,
+ FAILED,
+ HALTED,
+ FINALIZE
+ }
+
+ String id;
+ ContainerState state;
+ int yarnId;
+ String owner;
+ YarnContainerProcessProperties properties;
+
+ public YarnContainerData() {
+ // left blank
+ }
+
+ public YarnContainerData(String id, String owner, YarnContainerProcessProperties properties) {
+ this.id = id;
+ this.state = ContainerState.ACQUIRE;
+ this.yarnId = -1;
+ this.owner = owner;
+ this.properties = properties;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public YarnContainerData setId(String id) {
+ this.id = id;
+ return this;
+ }
+
+ public ContainerState getState() {
+ return state;
+ }
+
+ public YarnContainerData setState(ContainerState state) {
+ this.state = state;
+ return this;
+ }
+
+ public int getYarnId() {
+ return yarnId;
+ }
+
+ public YarnContainerData setYarnId(int yarnId) {
+ this.yarnId = yarnId;
+ return this;
+ }
+
+ public String getOwner() {
+ return owner;
+ }
+
+ public YarnContainerData setOwner(String owner) {
+ this.owner = owner;
+ return this;
+ }
+
+ public YarnContainerProcessProperties getProperties() {
+ return properties;
+ }
+
+ public YarnContainerData setProperties(YarnContainerProcessProperties properties) {
+ this.properties = properties;
+ return this;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProcess.java
new file mode 100644
index 0000000..2cad52d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProcess.java
@@ -0,0 +1,53 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Host process for {@link ContainerProcess}es spawned by
+ * {@link YarnContainerProvider}. Configured via *.properties file in working
+ * directory. Corresponds to regular container in YARN and is managed jointly by
+ * the application master and the Helix participant. (Program entry point)
+ *
+ */
+class YarnContainerProcess {
+ static final Logger log = Logger.getLogger(YarnContainerProcess.class);
+
+ public static void main(String[] args) throws Exception {
+ log.trace("BEGIN YarnProcess.main()");
+
+ final YarnContainerProcessProperties properties = YarnUtils.createContainerProcessProperties(YarnUtils
+ .getPropertiesFromPath(YarnUtils.YARN_CONTAINER_PROPERTIES));
+ Preconditions.checkArgument(properties.isValid(), "container properties not valid: %s", properties.toString());
+
+ log.debug("Launching yarndata service");
+ final ZookeeperYarnDataProvider metaService = new ZookeeperYarnDataProvider(properties.getYarnData());
+ metaService.start();
+
+ log.debug("Launching yarn container service");
+ final YarnContainerService yarnService = new YarnContainerService();
+ yarnService.configure(properties);
+ yarnService.setYarnDataProvider(metaService);
+ yarnService.start();
+
+ log.debug("Installing shutdown hooks");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ log.debug("Running shutdown hook");
+ yarnService.stop();
+ metaService.stop();
+ }
+ }));
+
+ System.out.println("Press ENTER to stop container process");
+ System.in.read();
+
+ log.debug("Stopping container services");
+ System.exit(0);
+
+ log.trace("END YarnProcess.main()");
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProcessProperties.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProcessProperties.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProcessProperties.java
new file mode 100644
index 0000000..5277e2f
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProcessProperties.java
@@ -0,0 +1,40 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base configuration for {@link YarnContainerProcess}.
+ *
+ */
+public class YarnContainerProcessProperties extends ContainerProcessProperties {
+ /**
+ *
+ */
+ private static final long serialVersionUID = -2209509977839674160L;
+
+ public final static String YARNDATA = "yarndata";
+
+ public boolean isValid() {
+ return super.isValid() &&
+ containsKey(YARNDATA);
+ }
+
+ public String getYarnData() {
+ return getProperty(YARNDATA);
+ }
+
+ @Override
+ public Object get(Object key) {
+ Preconditions.checkState(containsKey(key));
+ return super.get(key);
+ }
+
+ @Override
+ public String getProperty(String key) {
+ Preconditions.checkState(containsKey(key));
+ return super.getProperty(key);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProvider.java
new file mode 100644
index 0000000..c229a26
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProvider.java
@@ -0,0 +1,143 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.helix.metamanager.ContainerProvider;
+import org.apache.helix.metamanager.ContainerProviderService;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * {@link ContainerProvider} spawning YARN-based containers. Reads and writes
+ * meta data using {@link YarnDataProvider}. Works in a distributed setting, but
+ * typically requires access to zookeeper.
+ *
+ */
+class YarnContainerProvider implements ContainerProviderService {
+
+ static final Logger log = Logger.getLogger(YarnContainerProvider.class);
+
+ static final long POLL_INTERVAL = 1000;
+ static final long CONTAINER_TIMEOUT = 60000;
+
+ /*
+ * CONTAINERS
+ * A (A, READY)
+ * B (B, RUNNING)
+ */
+
+ final Object notifier = new Object();
+ final Map<String, Properties> types = new HashMap<String, Properties>();
+
+ ZookeeperYarnDataProvider yarnDataService;
+ YarnContainerProviderProcess yarnApp;
+ YarnContainerProviderProperties properties;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ YarnContainerProviderProperties yarnProps = new YarnContainerProviderProperties();
+ yarnProps.putAll(properties);
+ configure(yarnProps);
+ }
+
+ private void configure(YarnContainerProviderProperties properties) {
+ this.properties = properties;
+
+ for(String containerType : properties.getContainers()) {
+ registerType(containerType, properties.getContainer(containerType));
+ }
+ }
+
+ @Override
+ public void start() throws Exception {
+ Preconditions.checkNotNull(properties);
+ Preconditions.checkState(properties.isValid(), "provider properties not valid: %s", properties);
+
+ log.debug("Starting yarn container provider service");
+ yarnDataService = new ZookeeperYarnDataProvider();
+ yarnDataService.configure(properties);
+ yarnDataService.start();
+ }
+
+ @Override
+ public void stop() throws Exception {
+ log.debug("Stopping yarn container provider service");
+ destroyAll();
+
+ if(yarnDataService != null) {
+ yarnDataService.stop();
+ yarnDataService = null;
+ }
+ }
+
+ @Override
+ public void create(final String id, final String type) throws Exception {
+ Preconditions.checkArgument(types.containsKey(type), "Container type '%s' is not configured", type);
+
+ YarnContainerProcessProperties containerProperties = YarnUtils.createContainerProcessProperties(types.get(type));
+
+ log.info(String.format("Running container '%s' (properties='%s')", id, containerProperties));
+
+ yarnDataService.create(new YarnContainerData(id, properties.getName(), containerProperties));
+ waitForState(id, ContainerState.ACTIVE);
+ }
+
+ @Override
+ public void destroy(final String id) throws Exception {
+ YarnContainerData meta = yarnDataService.read(id);
+
+ if(meta.state == ContainerState.ACTIVE) {
+ log.info(String.format("Destroying active container, going to teardown"));
+ yarnDataService.update(meta.setState(ContainerState.TEARDOWN));
+
+ } else if(meta.state == ContainerState.FAILED) {
+ log.info(String.format("Destroying failed container, going to teardown"));
+ yarnDataService.update(meta.setState(ContainerState.TEARDOWN));
+
+ } else if(meta.state == ContainerState.FINALIZE) {
+ log.info(String.format("Destroying finalized container, skipping"));
+
+ } else {
+ throw new IllegalStateException(String.format("Container '%s' must be active, failed or finalized", id));
+ }
+
+ waitForState(id, ContainerState.FINALIZE);
+ yarnDataService.delete(id);
+ }
+
+ @Override
+ public void destroyAll() {
+ try {
+ for(YarnContainerData meta : yarnDataService.readAll()) {
+ if(meta.owner.equals(properties.getName())) {
+ try { destroy(meta.id); } catch (Exception ignore) {}
+ }
+ }
+ } catch (Exception ignore) {
+ // ignore
+ }
+ }
+
+ void waitForState(String id, ContainerState state) throws Exception, InterruptedException, TimeoutException {
+ long limit = System.currentTimeMillis() + CONTAINER_TIMEOUT;
+ YarnContainerData meta = yarnDataService.read(id);
+ while(meta.state != state) {
+ if(System.currentTimeMillis() >= limit) {
+ throw new TimeoutException(String.format("Container '%s' failed to reach state '%s' (currently is '%s')", id, state, meta.state));
+ }
+ Thread.sleep(POLL_INTERVAL);
+ meta = yarnDataService.read(id);
+ }
+ }
+
+ void registerType(String name, Properties properties) {
+ log.debug(String.format("Registering container type '%s' (properties='%s')", name, properties));
+ types.put(name, properties);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProviderProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProviderProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProviderProcess.java
new file mode 100644
index 0000000..d883dce
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProviderProcess.java
@@ -0,0 +1,158 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.io.File;
+import java.net.InetSocketAddress;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ClientRMProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.helix.metamanager.Service;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Configurable and runnable service for {@link YarnContainerProvider}
+ *
+ */
+public class YarnContainerProviderProcess implements Service {
+
+ static final Logger log = Logger.getLogger(YarnContainerProviderProcess.class);
+
+ static String YARN_MASTER_COMMAND = "/bin/sh %s 1>%s/stdout 2>%s/stderr";
+
+ Configuration conf;
+ YarnRPC rpc;
+ ClientRMProtocol rmClient;
+ ApplicationId appId;
+ File propertiesFile;
+
+ YarnContainerProviderProperties properties;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ configure(YarnUtils.createContainerProviderProperties(properties));
+ }
+
+ private void configure(YarnContainerProviderProperties properties) {
+ this.conf = new YarnConfiguration();
+ this.conf.set(YarnConfiguration.RM_ADDRESS, properties.getResourceManager());
+ this.conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, properties.getScheduler());
+ this.conf.set(FileSystem.FS_DEFAULT_NAME_KEY, properties.getHdfs());
+
+ this.rpc = YarnRPC.create(conf);
+
+ this.properties = properties;
+ }
+
+ @Override
+ public void start() throws Exception {
+ Preconditions.checkNotNull(properties);
+ Preconditions.checkState(properties.isValid());
+
+ connect();
+
+ String command = String.format(YARN_MASTER_COMMAND, YarnUtils.YARN_MASTER_PATH, ApplicationConstants.LOG_DIR_EXPANSION_VAR,
+ ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+
+ log.info(String.format("Starting application '%s' provider '%s' (masterCommand='%s')", properties.getYarnData(), properties.getName(), command));
+
+ log.debug(String.format("Running master command \"%s\"", command));
+
+ // app id
+ GetNewApplicationRequest appRequest = Records.newRecord(GetNewApplicationRequest.class);
+ GetNewApplicationResponse appResponse = rmClient.getNewApplication(appRequest);
+
+ this.appId = appResponse.getApplicationId();
+
+ log.info(String.format("Acquired app id '%s' for '%s'", appId.toString(), properties.getName()));
+
+ // command
+ ContainerLaunchContext launchContext = Records.newRecord(ContainerLaunchContext.class);
+ launchContext.setCommands(Collections.singletonList(command));
+
+ // resource limit
+ Resource resource = Records.newRecord(Resource.class);
+ resource.setMemory(256); // TODO make dynamic
+ launchContext.setResource(resource);
+
+ // environment
+ Map<String, String> env = new HashMap<String, String>();
+ launchContext.setEnvironment(env);
+
+ // configuration
+ propertiesFile = YarnUtils.writePropertiesToTemp(properties);
+
+ // HDFS
+ final String namespace = appId.toString();
+ final Path masterArchive = YarnUtils.copyToHdfs(YarnUtils.YARN_MASTER_ARCHIVE_PATH, YarnUtils.YARN_MASTER_STAGING, namespace, conf);
+ final Path masterProperties = YarnUtils.copyToHdfs(propertiesFile.getCanonicalPath(), YarnUtils.YARN_MASTER_PROPERTIES, namespace, conf);
+ final Path containerArchive = YarnUtils.copyToHdfs(YarnUtils.YARN_CONTAINER_ARCHIVE_PATH, YarnUtils.YARN_CONTAINER_STAGING, namespace, conf);
+
+ // local resources
+ Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
+ localResources.put(YarnUtils.YARN_MASTER_DESTINATION, YarnUtils.createHdfsResource(masterArchive, LocalResourceType.ARCHIVE, conf));
+ localResources.put(YarnUtils.YARN_MASTER_PROPERTIES, YarnUtils.createHdfsResource(masterProperties, LocalResourceType.FILE, conf));
+ localResources.put(YarnUtils.YARN_CONTAINER_STAGING, YarnUtils.createHdfsResource(containerArchive, LocalResourceType.FILE, conf));
+
+ launchContext.setLocalResources(localResources);
+
+ // user
+ launchContext.setUser(properties.getUser());
+
+ // app submission
+ ApplicationSubmissionContext subContext = Records.newRecord(ApplicationSubmissionContext.class);
+ subContext.setApplicationId(appId);
+ subContext.setApplicationName(properties.getName());
+ subContext.setAMContainerSpec(launchContext);
+
+ SubmitApplicationRequest subRequest = Records.newRecord(SubmitApplicationRequest.class);
+ subRequest.setApplicationSubmissionContext(subContext);
+
+ log.info(String.format("Starting app id '%s'", appId.toString()));
+
+ rmClient.submitApplication(subRequest);
+
+ }
+
+ @Override
+ public void stop() throws YarnRemoteException {
+ log.info(String.format("Stopping app id '%s'", appId.toString()));
+ KillApplicationRequest killRequest = Records.newRecord(KillApplicationRequest.class);
+ killRequest.setApplicationId(appId);
+
+ rmClient.forceKillApplication(killRequest);
+
+ try { YarnUtils.destroyHdfsNamespace(appId.toString(), conf); } catch(Exception ignore) {}
+
+ propertiesFile.delete();
+ }
+
+ void connect() {
+ YarnConfiguration yarnConf = new YarnConfiguration(conf);
+ InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS));
+ log.info("Connecting to ResourceManager at: " + rmAddress);
+ this.rmClient = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, conf));
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProviderProperties.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProviderProperties.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProviderProperties.java
new file mode 100644
index 0000000..95ad0aa
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerProviderProperties.java
@@ -0,0 +1,64 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import org.apache.helix.metamanager.provider.ProviderProperties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base configuration for {@link YarnContainerProviderProcess}
+ *
+ */
+public class YarnContainerProviderProperties extends ProviderProperties {
+ /**
+ *
+ */
+ private static final long serialVersionUID = -8853614843205587170L;
+
+ public final static String YARNDATA = "yarndata";
+ public final static String RESOURCEMANAGER = "resourcemananger";
+ public final static String SCHEDULER = "scheduler";
+ public final static String USER = "user";
+ public final static String HDFS = "hdfs";
+
+ public boolean isValid() {
+ return super.isValid() &&
+ containsKey(YARNDATA) &&
+ containsKey(RESOURCEMANAGER) &&
+ containsKey(SCHEDULER) &&
+ containsKey(USER) &&
+ containsKey(HDFS);
+ }
+
+ public String getYarnData() {
+ return getProperty(YARNDATA);
+ }
+
+ public String getResourceManager() {
+ return getProperty(RESOURCEMANAGER);
+ }
+
+ public String getScheduler() {
+ return getProperty(SCHEDULER);
+ }
+
+ public String getUser() {
+ return getProperty(USER);
+ }
+
+ public String getHdfs() {
+ return getProperty(HDFS);
+ }
+
+ @Override
+ public String getProperty(String key) {
+ Preconditions.checkState(containsKey(key));
+ return super.getProperty(key);
+ }
+
+ @Override
+ public Object get(Object key) {
+ Preconditions.checkState(containsKey(key));
+ return super.get(key);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerService.java
new file mode 100644
index 0000000..804d6ed
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/yarn/YarnContainerService.java
@@ -0,0 +1,156 @@
+package org.apache.helix.metamanager.impl.yarn;
+
+import java.io.File;
+import java.util.Properties;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.metamanager.container.ContainerUtils;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerData.ContainerState;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Configurable and runnable service for YARN-based containers. Continuously
+ * checks container meta data and process state and triggers state changes and
+ * container setup and shutdown.
+ *
+ */
+class YarnContainerService implements Service {
+ static final Logger log = Logger.getLogger(YarnContainerService.class);
+
+ static final long CONTAINERSERVICE_INTERVAL = 1000;
+
+ YarnContainerProcessProperties properties;
+
+ YarnDataProvider metaService;
+ ScheduledExecutorService executor;
+
+ ContainerProcess process;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ Preconditions.checkNotNull(properties);
+ YarnContainerProcessProperties containerProperties = new YarnContainerProcessProperties();
+ containerProperties.putAll(properties);
+ Preconditions.checkArgument(containerProperties.isValid());
+
+ this.properties = containerProperties;
+ }
+
+ public void setYarnDataProvider(YarnDataProvider metaService) {
+ this.metaService = metaService;
+ }
+
+ @Override
+ public void start() {
+ Preconditions.checkNotNull(metaService);
+ Preconditions.checkNotNull(properties);
+ Preconditions.checkState(properties.isValid());
+
+ log.debug("starting yarn container service");
+
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new ContainerStatusService(), 0, CONTAINERSERVICE_INTERVAL, TimeUnit.MILLISECONDS);
+ }
+
+ @Override
+ public void stop() {
+ log.debug("stopping yarn container service");
+
+ if (executor != null) {
+ executor.shutdown();
+ while (!executor.isTerminated()) {
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ }
+ executor = null;
+ }
+
+ destroyLocalContainerNamespace();
+ }
+
+ class ContainerStatusService implements Runnable {
+ @Override
+ public void run() {
+ log.info("updating container status");
+
+ try {
+ if (!metaService.exists(properties.getName())) {
+ log.warn(String.format("YarnData for '%s' does not exist. Terminating yarn service.", properties.getName()));
+ process.stop();
+ stop();
+ }
+
+ YarnContainerData meta = metaService.read(properties.getName());
+
+ if (meta.state == ContainerState.CONNECTING) {
+ log.trace("container connecting");
+ try {
+ ContainerProcessProperties containerProperties = meta.getProperties();
+
+ containerProperties.setProperty(ContainerProcessProperties.CLUSTER, properties.getCluster());
+ containerProperties.setProperty(ContainerProcessProperties.ADDRESS, properties.getAddress());
+ containerProperties.setProperty(ContainerProcessProperties.NAME, properties.getName());
+
+ process = ContainerUtils.createProcess(containerProperties);
+ process.start();
+ } catch (Exception e) {
+ log.error("Failed to start participant, going to failed", e);
+ }
+
+ if (process.isActive()) {
+ log.trace("process active, activating container");
+ metaService.update(meta.setState(ContainerState.ACTIVE));
+
+ } else if (process.isFailed()) {
+ log.trace("process failed, failing container");
+ metaService.update(meta.setState(ContainerState.FAILED));
+
+ } else {
+ log.trace("process state unknown, failing container");
+ metaService.update(meta.setState(ContainerState.FAILED));
+ }
+ }
+
+ if (meta.state == ContainerState.ACTIVE) {
+ log.trace("container active");
+ if (process.isFailed()) {
+ log.trace("process failed, failing container");
+ metaService.update(meta.setState(ContainerState.FAILED));
+
+ } else if (!process.isActive()) {
+ log.trace("process not active, halting container");
+ process.stop();
+ metaService.update(meta.setState(ContainerState.HALTED));
+ }
+ }
+
+ if (meta.state == ContainerState.TEARDOWN) {
+ log.trace("container teardown");
+ process.stop();
+ metaService.update(meta.setState(ContainerState.HALTED));
+ }
+
+ } catch (Exception e) {
+ log.error(String.format("Error while updating container '%s' status", properties.getName()), e);
+ }
+ }
+ }
+
+ public static void destroyLocalContainerNamespace() {
+ log.info("cleaning up container directory");
+ FileUtils.deleteQuietly(new File(YarnUtils.YARN_CONTAINER_DESTINATION));
+ FileUtils.deleteQuietly(new File(YarnUtils.YARN_CONTAINER_PROPERTIES));
+ }
+
+}
[07/15] Adding Helix-task-framework and Yarn integration modules
Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/RedisTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/RedisTargetProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/RedisTargetProvider.java
new file mode 100644
index 0000000..94c617d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/RedisTargetProvider.java
@@ -0,0 +1,329 @@
+package org.apache.helix.metamanager.cluster;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.metamanager.ClusterStatusProvider;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+public class RedisTargetProvider implements ClusterStatusProvider {
+
+ static final Logger log = Logger.getLogger(RedisTargetProvider.class);
+
+ public static final String BENCHMARK_COMMAND = "redis-benchmark";
+ public static final String BENCHMARK_TESTS = "GET,SET";
+
+ public static final String DEFAULT_RECORDS = "100000";
+ public static final String DEFAULT_CLIENTS = "20";
+ public static final String DEFAULT_REQUESTS = "100000";
+ public static final String DEFAULT_TIMEOUT = "8000";
+ public static final String DEFAULT_INTERVAL = "10000";
+
+ ZkClient zookeeper;
+
+ final String address;
+ final String root;
+
+ final int records;
+ final int clients;
+ final int requests;
+ final int timeout;
+ final int interval;
+
+ int targetTpsGet;
+ int targetTpsSet;
+ int targetCount = 1;
+
+ ScheduledExecutorService executor;
+
+ public RedisTargetProvider(Properties properties) {
+ address = properties.getProperty("address");
+ root = properties.getProperty("root");
+ targetTpsGet = Integer.valueOf(properties.getProperty("tps.get", "0"));
+ targetTpsSet = Integer.valueOf(properties.getProperty("tps.set", "0"));
+ records = Integer.valueOf(properties.getProperty("records", DEFAULT_RECORDS));
+ clients = Integer.valueOf(properties.getProperty("clients", DEFAULT_CLIENTS));
+ requests = Integer.valueOf(properties.getProperty("requests", DEFAULT_REQUESTS));
+ timeout = Integer.valueOf(properties.getProperty("timeout", DEFAULT_TIMEOUT));
+ interval = Integer.valueOf(properties.getProperty("interval", DEFAULT_INTERVAL));
+ }
+
+ public void startService() {
+ log.debug("starting redis status service");
+ zookeeper = new ZkClient(address);
+ zookeeper.createPersistent("/" + root, true);
+
+ // TODO not concurrency-safe, should not matter though
+ if (!zookeeper.exists("/" + root + "/target.get")) {
+ try {
+ zookeeper.createPersistent("/" + root + "/target.get", String.valueOf(targetTpsGet));
+ } catch (Exception ignore) {
+ }
+ }
+ if (!zookeeper.exists("/" + root + "/target.set")) {
+ try {
+ zookeeper.createPersistent("/" + root + "/target.set", String.valueOf(targetTpsSet));
+ } catch (Exception ignore) {
+ }
+ }
+
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new RedisBenchmarkRunnable(), 0, interval, TimeUnit.MILLISECONDS);
+ }
+
+ public void stopService() {
+ log.debug("stopping redis status service");
+ if (executor != null) {
+ executor.shutdownNow();
+ while (!executor.isTerminated()) {
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ }
+ executor = null;
+ }
+ if (zookeeper != null) {
+ zookeeper.close();
+ zookeeper = null;
+ }
+ }
+
+ @Override
+ public int getTargetContainerCount(String containerType) throws Exception {
+ return targetCount;
+ }
+
+ private class RedisBenchmarkRunnable implements Runnable {
+ ExecutorService executor = Executors.newCachedThreadPool();
+ RedisResult aggregateResult;
+
+ @Override
+ public void run() {
+ log.debug("running redis benchmark");
+
+ aggregateResult = new RedisResult(0);
+ Collection<Future<RedisResult>> futures = new ArrayList<Future<RedisResult>>();
+
+ try {
+ Collection<RedisTarget> targets = getTargets();
+
+ // start benchmark
+ for (RedisTarget target : targets) {
+ log.debug(String.format("submitting target '%s'", target));
+ Future<RedisResult> future = executor.submit(new RedisCallable(target));
+ futures.add(future);
+ }
+
+ // aggregate results
+ try {
+ log.debug("waiting for results");
+
+ long limit = System.currentTimeMillis() + timeout;
+ for (Future<RedisResult> future : futures) {
+ try {
+ RedisResult result = future.get(limit - System.currentTimeMillis(), TimeUnit.MILLISECONDS);
+ log.debug(String.format("got result '%s'", result));
+ aggregate(result);
+ } catch (Exception e) {
+ log.warn(String.format("failed to get result"));
+ future.cancel(true);
+ }
+ }
+ } catch (Exception e) {
+ log.error("Error running redis benchmark", e);
+
+ for (Future<RedisResult> future : futures) {
+ future.cancel(true);
+ }
+
+ return;
+ }
+
+ // compare to thresholds
+ log.debug(String.format("aggregate result is '%s'", aggregateResult));
+
+ // get target from zookeeper
+ try {
+ targetTpsGet = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.get"));
+ } catch (Exception ignore) {
+ }
+ try {
+ targetTpsSet = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.set"));
+ } catch (Exception ignore) {
+ }
+
+ // calculate counts
+ int targetCountGet = -1;
+ if (aggregateResult.containsKey("GET")) {
+ double tpsTarget = targetTpsGet;
+ double tps = aggregateResult.get("GET");
+
+ targetCountGet = (int) Math.ceil(tpsTarget / tps * aggregateResult.serverCount);
+ log.debug(String.format("count.get=%d, tps.get=%f, target.get=%f", targetCountGet, tps, tpsTarget));
+ }
+
+ int targetCountSet = -1;
+ if (aggregateResult.containsKey("SET")) {
+ double tpsTarget = targetTpsSet;
+ double tps = aggregateResult.get("SET");
+
+ targetCountSet = (int) Math.ceil(tpsTarget / tps * aggregateResult.serverCount);
+ log.debug(String.format("count.set=%d, tps.set=%f, target.set=%f", targetCountSet, tps, tpsTarget));
+ }
+
+ targetCount = Math.max(targetCountGet, targetCountSet);
+ targetCount = Math.max(targetCount, 1);
+
+ log.debug(String.format("target count is %d", targetCount));
+ RedisTargetProvider.this.targetCount = targetCount;
+
+ } catch (Exception e) {
+ log.error("Error running redis benchmark", e);
+
+ for (Future<RedisResult> future : futures) {
+ future.cancel(true);
+ }
+ }
+
+ }
+
+ Collection<RedisTarget> getTargets() {
+ log.debug("fetching redis servers from zookeeper");
+ Collection<RedisTarget> targets = new ArrayList<RedisTarget>();
+ Collection<String> servers = zookeeper.getChildren("/" + root);
+
+ servers.remove("target.get");
+ servers.remove("target.set");
+
+ for (String server : servers) {
+ String hostname = zookeeper.readData("/" + root + "/" + server + "/hostname");
+ int port = Integer.valueOf(zookeeper.<String> readData("/" + root + "/" + server + "/port"));
+
+ targets.add(new RedisTarget(hostname, port));
+ }
+
+ log.debug(String.format("found %d servers: %s", targets.size(), targets));
+ return targets;
+ }
+
+ void aggregate(RedisResult result) {
+ RedisResult newResult = new RedisResult(aggregateResult.serverCount + result.serverCount);
+
+ for (Entry<String, Double> entry : result.entrySet()) {
+ double current = 0.0d;
+ if (aggregateResult.containsKey(entry.getKey()))
+ current = aggregateResult.get(entry.getKey());
+
+ current += entry.getValue();
+ newResult.put(entry.getKey(), current);
+ }
+
+ aggregateResult = newResult;
+ }
+ }
+
+ private static class RedisTarget {
+ final String hostname;
+ final int port;
+
+ public RedisTarget(String hostname, int port) {
+ this.hostname = hostname;
+ this.port = port;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("%s:%d", hostname, port);
+ }
+ }
+
+ private static class RedisResult extends HashMap<String, Double> {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 4599748807597500952L;
+
+ final int serverCount;
+
+ public RedisResult(int serverCount) {
+ this.serverCount = serverCount;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("[serverCount=%d %s]", serverCount, super.toString());
+ }
+ }
+
+ private class RedisCallable implements Callable<RedisResult> {
+ final RedisTarget target;
+
+ public RedisCallable(RedisTarget target) {
+ this.target = target;
+ }
+
+ @Override
+ public RedisResult call() throws Exception {
+ log.debug(String.format("executing benchmark for '%s'", target));
+
+ ProcessBuilder builder = new ProcessBuilder();
+ builder.command(BENCHMARK_COMMAND, "-h", target.hostname, "-p", String.valueOf(target.port), "-r", String.valueOf(records), "-n",
+ String.valueOf(requests), "-c", String.valueOf(clients), "-t", BENCHMARK_TESTS, "--csv");
+ Process process = builder.start();
+
+ log.debug(String.format("running '%s'", builder.command()));
+
+ RedisResult result = new RedisResult(1);
+
+ int retVal;
+ try {
+ retVal = process.waitFor();
+ } catch (InterruptedException e) {
+ process.destroy();
+ return result;
+ }
+
+ Preconditions.checkState(retVal == 0, "Benchmark process returned %s", retVal);
+
+ Pattern pattern = Pattern.compile("\"([A-Z0-9_]+).*\",\"([0-9\\.]+)\"");
+
+ log.debug("parsing output");
+ BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));
+ String line = null;
+ while ((line = reader.readLine()) != null) {
+ Matcher matcher = pattern.matcher(line);
+
+ if (!matcher.find())
+ continue;
+
+ String key = matcher.group(1);
+ Double value = Double.valueOf(matcher.group(2));
+
+ result.put(key, value);
+ }
+
+ log.debug(String.format("benchmark for '%s' returned '%s'", target, result));
+
+ return result;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/StaticTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/StaticTargetProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/StaticTargetProvider.java
new file mode 100644
index 0000000..47bf725
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/StaticTargetProvider.java
@@ -0,0 +1,41 @@
+package org.apache.helix.metamanager.cluster;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.ClusterStatusProvider;
+
+
+public class StaticTargetProvider implements ClusterStatusProvider {
+
+ final Map<String, Integer> targetCounts = new HashMap<String, Integer>();
+
+ public StaticTargetProvider() {
+ // left blank
+ }
+
+ public StaticTargetProvider(Properties properties) {
+ for(Entry<Object, Object> entry : properties.entrySet()) {
+ String key = (String)entry.getKey();
+ int value = Integer.valueOf((String)entry.getValue());
+
+ targetCounts.put(key, value);
+ }
+ }
+
+ public StaticTargetProvider(Map<String, Integer> targetCounts) {
+ this.targetCounts.putAll(targetCounts);
+ }
+
+ @Override
+ public int getTargetContainerCount(String containerType) {
+ return targetCounts.get(containerType);
+ }
+
+ public void setTargetContainerCount(String containerType, int targetCount) {
+ targetCounts.put(containerType, targetCount);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerProcess.java
new file mode 100644
index 0000000..11ad86e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerProcess.java
@@ -0,0 +1,133 @@
+package org.apache.helix.metamanager.container;
+
+import java.util.Properties;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.InstanceType;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base service for spawn-able container types. Configure from Properties and
+ * instantiates Helix participant to managed cluster.
+ *
+ */
+public abstract class ContainerProcess implements Service {
+ static final Logger log = Logger.getLogger(ContainerProcess.class);
+
+ private ContainerProcessProperties properties;
+ private HelixManager participantManager;
+
+ private String modelName;
+ private StateModelFactory<? extends StateModel> modelFactory;
+
+ private String instanceName;
+ private String clusterName;
+ private String zookeeperAddress;
+
+ private boolean active = false;
+ private boolean failed = false;
+
+ public final void setModelName(String modelName) {
+ this.modelName = modelName;
+ }
+
+ public final void setModelFactory(StateModelFactory<? extends StateModel> modelFactory) {
+ this.modelFactory = modelFactory;
+ }
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ ContainerProcessProperties containerProps = new ContainerProcessProperties();
+ containerProps.putAll(properties);
+ Preconditions.checkArgument(containerProps.isValid());
+
+ this.properties = containerProps;
+ this.instanceName = containerProps.getName();
+ this.clusterName = containerProps.getCluster();
+ this.zookeeperAddress = containerProps.getAddress();
+ }
+
+ @Override
+ public final void start() {
+ try {
+ Preconditions.checkNotNull(modelName, "state model name not set");
+ Preconditions.checkNotNull(modelFactory, "state model factory not set");
+ Preconditions.checkState(properties.isValid(), "process properties not valid: %s", properties.toString());
+
+ log.info(String.format("starting container '%s'", instanceName));
+ startContainer();
+
+ log.info(String.format("starting helix participant '%s'", instanceName));
+ startParticipant();
+
+ active = true;
+
+ } catch (Exception e) {
+ log.error(String.format("starting container '%s' failed", instanceName), e);
+ fail();
+ }
+ }
+
+ protected abstract void startContainer() throws Exception;
+
+ private final void startParticipant() throws Exception {
+ participantManager = HelixManagerFactory.getZKHelixManager(clusterName, instanceName, InstanceType.PARTICIPANT, zookeeperAddress);
+ participantManager.getStateMachineEngine().registerStateModelFactory(modelName, modelFactory);
+ participantManager.connect();
+ }
+
+ @Override
+ public final void stop() {
+ try {
+ log.info(String.format("stopping helix participant '%s'", instanceName));
+ stopParticipant();
+
+ log.info(String.format("stopping container '%s'", instanceName));
+ stopContainer();
+
+ active = false;
+
+ } catch (Exception e) {
+ log.warn(String.format("stopping container '%s' failed", instanceName), e);
+ }
+ }
+
+ protected abstract void stopContainer() throws Exception;
+
+ private final void stopParticipant() {
+ if (participantManager != null) {
+ participantManager.disconnect();
+ }
+ }
+
+ public final void fail() {
+ failed = true;
+ }
+
+ public final boolean isActive() {
+ return active && !failed;
+ }
+
+ public final boolean isFailed() {
+ return failed;
+ }
+
+ public final ContainerProcessProperties getProperties() {
+ return properties;
+ }
+
+ String getModelName() {
+ return modelName;
+ }
+
+ StateModelFactory<? extends StateModel> getModelFactory() {
+ return modelFactory;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerProcessProperties.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerProcessProperties.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerProcessProperties.java
new file mode 100644
index 0000000..1a6d272
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerProcessProperties.java
@@ -0,0 +1,66 @@
+package org.apache.helix.metamanager.container;
+
+import java.util.Properties;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Base configuration for ContainerProcess.
+ *
+ */
+public class ContainerProcessProperties extends Properties {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 5754863079470995536L;
+
+ public static final String CLUSTER = "cluster";
+ public static final String ADDRESS = "address";
+ public static final String NAME = "name";
+ public static final String CONTAINER_CLASS = "class";
+
+ public ContainerProcessProperties() {
+ // left blank
+ }
+
+ public ContainerProcessProperties(Properties properties) {
+ Preconditions.checkNotNull(properties);
+ putAll(properties);
+ }
+
+ public boolean isValid() {
+ return containsKey(CLUSTER) &&
+ containsKey(NAME) &&
+ containsKey(ADDRESS) &&
+ containsKey(CONTAINER_CLASS);
+ }
+
+ public String getCluster() {
+ return getProperty(CLUSTER);
+ }
+
+ public String getAddress() {
+ return getProperty(ADDRESS);
+ }
+
+ public String getName() {
+ return getProperty(NAME);
+ }
+
+ public String getContainerClass() {
+ return getProperty(CONTAINER_CLASS);
+ }
+
+ @Override
+ public synchronized Object get(Object key) {
+ Preconditions.checkState(containsKey(key));
+ return super.get(key);
+ }
+
+ @Override
+ public String getProperty(String key) {
+ Preconditions.checkState(containsKey(key));
+ return super.getProperty(key);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerStateModel.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerStateModel.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerStateModel.java
new file mode 100644
index 0000000..9ac6b5c
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerStateModel.java
@@ -0,0 +1,64 @@
+package org.apache.helix.metamanager.container;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+@StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+public class ContainerStateModel extends StateModel {
+
+ static final Logger log = Logger.getLogger(ContainerStateModel.class);
+
+ @Transition(from = "OFFLINE", to = "SLAVE")
+ public void offlineToSlave(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to SLAVE",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "SLAVE", to = "OFFLINE")
+ public void slaveToOffline(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from SLAVE to OFFLINE",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "SLAVE", to = "MASTER")
+ public void slaveToMaster(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from SLAVE to MASTER",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "MASTER", to = "SLAVE")
+ public void masterToSlave(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from MASTER to SLAVE",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void offlineToDropped(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to DROPPED",
+ context.getManager().getInstanceName()));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerStateModelFactory.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerStateModelFactory.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerStateModelFactory.java
new file mode 100644
index 0000000..ab5a099
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerStateModelFactory.java
@@ -0,0 +1,30 @@
+package org.apache.helix.metamanager.container;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.participant.statemachine.StateModelFactory;
+
+public class ContainerStateModelFactory extends StateModelFactory<ContainerStateModel> {
+
+ @Override
+ public ContainerStateModel createNewStateModel(String partitionName) {
+ return new ContainerStateModel();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerUtils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerUtils.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerUtils.java
new file mode 100644
index 0000000..3d32862
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/ContainerUtils.java
@@ -0,0 +1,46 @@
+package org.apache.helix.metamanager.container;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.lang.reflect.Constructor;
+
+import org.apache.log4j.Logger;
+
+/**
+ * Utility for loading ContainerProperties and spawning ContainerProcess.
+ *
+ */
+public class ContainerUtils {
+
+ static final Logger log = Logger.getLogger(ContainerUtils.class);
+
+ private ContainerUtils() {
+ // left blank
+ }
+
+ public static ContainerProcess createProcess(ContainerProcessProperties properties) throws Exception {
+ String containerClassName = properties.getContainerClass();
+
+ Class<?> containerClass = Class.forName(containerClassName);
+
+ log.debug(String.format("checking for properties constructor in class '%s'", containerClassName));
+
+ Constructor<?> constructor = containerClass.getConstructor(ContainerProcessProperties.class);
+
+ return (ContainerProcess) constructor.newInstance(properties);
+ }
+
+ public static ContainerProcessProperties getPropertiesFromResource(String resourceName) throws IOException {
+ ContainerProcessProperties properties = new ContainerProcessProperties();
+ properties.load(ClassLoader.getSystemResourceAsStream(resourceName));
+ return properties;
+ }
+
+ public static ContainerProcessProperties getPropertiesFromPath(String filePath) throws IOException {
+ ContainerProcessProperties properties = new ContainerProcessProperties();
+ properties.load(new InputStreamReader(new FileInputStream(filePath)));
+ return properties;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyMasterSlaveProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyMasterSlaveProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyMasterSlaveProcess.java
new file mode 100644
index 0000000..d91d77c
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyMasterSlaveProcess.java
@@ -0,0 +1,76 @@
+package org.apache.helix.metamanager.container.impl;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+public class DummyMasterSlaveProcess extends ContainerProcess {
+
+ static final Logger log = Logger.getLogger(DummyMasterSlaveProcess.class);
+
+ public DummyMasterSlaveProcess(ContainerProcessProperties properties) {
+ super(properties);
+ setModelName("MasterSlave");
+ setModelFactory(new DummyMasterSlaveModelFactory());
+ }
+
+ @Override
+ protected void startContainer() throws Exception {
+ log.info("starting dummy process container");
+ }
+
+ @Override
+ protected void stopContainer() throws Exception {
+ log.info("stopping dummy process container");
+ }
+
+ public static class DummyMasterSlaveModelFactory extends StateModelFactory<DummyMasterSlaveStateModel> {
+ @Override
+ public DummyMasterSlaveStateModel createNewStateModel(String partitionName) {
+ return new DummyMasterSlaveStateModel();
+ }
+ }
+
+ @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+ public static class DummyMasterSlaveStateModel extends StateModel {
+
+ static final Logger log = Logger.getLogger(DummyMasterSlaveStateModel.class);
+
+ @Transition(from = "OFFLINE", to = "SLAVE")
+ public void offlineToSlave(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to SLAVE",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "SLAVE", to = "OFFLINE")
+ public void slaveToOffline(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from SLAVE to OFFLINE",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "SLAVE", to = "MASTER")
+ public void slaveToMaster(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from SLAVE to MASTER",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "MASTER", to = "SLAVE")
+ public void masterToSlave(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from MASTER to SLAVE",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void offlineToDropped(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to DROPPED",
+ context.getManager().getInstanceName()));
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyOnlineOfflineProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyOnlineOfflineProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyOnlineOfflineProcess.java
new file mode 100644
index 0000000..d5015f4
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyOnlineOfflineProcess.java
@@ -0,0 +1,64 @@
+package org.apache.helix.metamanager.container.impl;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+public class DummyOnlineOfflineProcess extends ContainerProcess {
+
+ static final Logger log = Logger.getLogger(DummyOnlineOfflineProcess.class);
+
+ public DummyOnlineOfflineProcess(ContainerProcessProperties properties) {
+ super(properties);
+ setModelName("OnlineOffline");
+ setModelFactory(new DummyOnlineOfflineModelFactory());
+ }
+
+ @Override
+ protected void startContainer() throws Exception {
+ log.info("starting dummy online-offline process container");
+ }
+
+ @Override
+ protected void stopContainer() throws Exception {
+ log.info("stopping dummy online-offline process container");
+ }
+
+ public static class DummyOnlineOfflineModelFactory extends StateModelFactory<DummyOnlineOfflineStateModel> {
+ @Override
+ public DummyOnlineOfflineStateModel createNewStateModel(String partitionName) {
+ return new DummyOnlineOfflineStateModel();
+ }
+ }
+
+ @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE", "DROPPED" })
+ public static class DummyOnlineOfflineStateModel extends StateModel {
+
+ static final Logger log = Logger.getLogger(DummyOnlineOfflineStateModel.class);
+
+ @Transition(from = "OFFLINE", to = "ONLINE")
+ public void offlineToOnline(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to ONLINE",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "ONLINE", to = "OFFLINE")
+ public void onlineToOffline(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from ONLINE to OFFLINE",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void offlineToDropped(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to DROPPED",
+ context.getManager().getInstanceName()));
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyProcess.java
new file mode 100644
index 0000000..b4963a7
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/DummyProcess.java
@@ -0,0 +1,76 @@
+package org.apache.helix.metamanager.container.impl;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+public class DummyProcess extends ContainerProcess {
+
+ static final Logger log = Logger.getLogger(DummyProcess.class);
+
+ public DummyProcess(ContainerProcessProperties properties) {
+ super(properties);
+ setModelName("MasterSlave");
+ setModelFactory(new DummyModelFactory());
+ }
+
+ @Override
+ protected void startContainer() throws Exception {
+ log.info("starting dummy process container");
+ }
+
+ @Override
+ protected void stopContainer() throws Exception {
+ log.info("stopping dummy process container");
+ }
+
+ public static class DummyModelFactory extends StateModelFactory<DummyStateModel> {
+ @Override
+ public DummyStateModel createNewStateModel(String partitionName) {
+ return new DummyStateModel();
+ }
+ }
+
+ @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+ public static class DummyStateModel extends StateModel {
+
+ static final Logger log = Logger.getLogger(DummyStateModel.class);
+
+ @Transition(from = "OFFLINE", to = "SLAVE")
+ public void offlineToSlave(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to SLAVE",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "SLAVE", to = "OFFLINE")
+ public void slaveToOffline(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from SLAVE to OFFLINE",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "SLAVE", to = "MASTER")
+ public void slaveToMaster(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from SLAVE to MASTER",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "MASTER", to = "SLAVE")
+ public void masterToSlave(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from MASTER to SLAVE",
+ context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void offlineToDropped(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to DROPPED",
+ context.getManager().getInstanceName()));
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/RedisServerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/RedisServerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/RedisServerProcess.java
new file mode 100644
index 0000000..d084a71
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/RedisServerProcess.java
@@ -0,0 +1,135 @@
+package org.apache.helix.metamanager.container.impl;
+
+import java.net.InetAddress;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+public class RedisServerProcess extends ContainerProcess {
+
+ static final Logger log = Logger.getLogger(RedisServerProcess.class);
+
+ public static final String REDIS_SERVER_COMMAND = "redis-server";
+
+ public static final long MONITOR_INTERVAL = 5000;
+
+ ZkClient zookeeper;
+
+ final String address;
+ final String root;
+ final String name;
+ final int basePort;
+
+ Process process;
+
+ ScheduledExecutorService executor;
+
+ public RedisServerProcess(ContainerProcessProperties properties) {
+ super(properties);
+
+ setModelName("OnlineOffline");
+ setModelFactory(new RedisServerModelFactory());
+
+ address = properties.getProperty("address");
+ root = properties.getProperty("root");
+ basePort = Integer.valueOf(properties.getProperty("baseport"));
+ name = properties.getProperty(ContainerProcessProperties.HELIX_INSTANCE);
+ }
+
+ @Override
+ protected void startContainer() throws Exception {
+ log.info(String.format("starting redis server container for instance '%s'", name));
+
+ String hostname = InetAddress.getLocalHost().getHostName();
+ int port = basePort + Integer.valueOf(name.split("_")[1]);
+
+ log.debug(String.format("Starting redis server at '%s:%d'", hostname, port));
+
+ ProcessBuilder builder = new ProcessBuilder();
+ builder.command(REDIS_SERVER_COMMAND, "--port", String.valueOf(port));
+ process = builder.start();
+
+ log.debug("Updating zookeeper");
+ zookeeper = new ZkClient(address);
+ zookeeper.deleteRecursive("/" + root + "/" + name);
+ zookeeper.createPersistent("/" + root + "/" + name, true);
+ zookeeper.createPersistent("/" + root + "/" + name + "/hostname", hostname);
+ zookeeper.createPersistent("/" + root + "/" + name + "/port", String.valueOf(port));
+
+ log.debug("Starting process monitor");
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new ProcessMonitor(), 0, MONITOR_INTERVAL, TimeUnit.MILLISECONDS);
+
+ }
+
+ @Override
+ protected void stopContainer() throws Exception {
+ log.info("stopping redis server container");
+
+ log.debug("Stopping process monitor");
+ executor.shutdownNow();
+
+ log.debug("Updating zookeeper");
+ zookeeper.deleteRecursive("/" + root + "/" + name);
+ zookeeper.close();
+
+ log.debug("Stopping process");
+ process.destroy();
+ process.waitFor();
+ }
+
+ public class RedisServerModelFactory extends StateModelFactory<RedisServerModel> {
+ @Override
+ public RedisServerModel createNewStateModel(String partitionName) {
+ return new RedisServerModel();
+ }
+ }
+
+ @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE", "DROPPED" })
+ public class RedisServerModel extends StateModel {
+
+ @Transition(from = "OFFLINE", to = "ONLINE")
+ public void offlineToSlave(Message m, NotificationContext context) {
+ // left blank
+ log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+ }
+
+ @Transition(from = "ONLINE", to = "OFFLINE")
+ public void slaveToOffline(Message m, NotificationContext context) {
+ // left blank
+ log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void offlineToDropped(Message m, NotificationContext context) {
+ // left blank
+ log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+ }
+
+ }
+
+ private class ProcessMonitor implements Runnable {
+ @Override
+ public void run() {
+ try {
+ process.exitValue();
+ log.warn("detected process failure");
+ fail();
+ } catch (Exception e) {
+ // expected
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/ZookeeperMasterSlaveProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/ZookeeperMasterSlaveProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/ZookeeperMasterSlaveProcess.java
new file mode 100644
index 0000000..f8bbc85
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/container/impl/ZookeeperMasterSlaveProcess.java
@@ -0,0 +1,104 @@
+package org.apache.helix.metamanager.container.impl;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+public class ZookeeperMasterSlaveProcess extends ContainerProcess {
+
+ static final Logger log = Logger.getLogger(ZookeeperMasterSlaveProcess.class);
+
+ ZkClient zookeeper;
+
+ final String address;
+ final String root;
+ final String name;
+
+ public ZookeeperMasterSlaveProcess(ContainerProcessProperties properties) {
+ super(properties);
+
+ setModelName("MasterSlave");
+ setModelFactory(new ZookeeperMasterSlaveModelFactory());
+
+ address = properties.getProperty("address");
+ root = properties.getProperty("root");
+ name = properties.getProperty(ContainerProcessProperties.HELIX_INSTANCE);
+ }
+
+ @Override
+ protected void startContainer() throws Exception {
+ log.info("starting zookeeper process container");
+
+ zookeeper = new ZkClient(address);
+ zookeeper.createPersistent("/" + root + "/" + name, true);
+ }
+
+ @Override
+ protected void stopContainer() throws Exception {
+ log.info("stopping zookeeper process container");
+
+ zookeeper.close();
+ }
+
+ public class ZookeeperMasterSlaveModelFactory extends StateModelFactory<ZookeeperMasterSlaveModel> {
+ @Override
+ public ZookeeperMasterSlaveModel createNewStateModel(String partitionName) {
+ return new ZookeeperMasterSlaveModel();
+ }
+ }
+
+ @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+ public class ZookeeperMasterSlaveModel extends StateModel {
+
+ @Transition(from = "OFFLINE", to = "SLAVE")
+ public void offlineToSlave(Message m, NotificationContext context) {
+ transition(m, context);
+ }
+
+ @Transition(from = "SLAVE", to = "OFFLINE")
+ public void slaveToOffline(Message m, NotificationContext context) {
+ transition(m, context);
+ }
+
+ @Transition(from = "SLAVE", to = "MASTER")
+ public void slaveToMaster(Message m, NotificationContext context) {
+ transition(m, context);
+ }
+
+ @Transition(from = "MASTER", to = "SLAVE")
+ public void masterToSlave(Message m, NotificationContext context) {
+ transition(m, context);
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void offlineToDropped(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+
+ String resource = m.getResourceName();
+ String partition = m.getPartitionName();
+ String path = "/" + root + "/" + name + "/" + resource + "_" + partition;
+
+ zookeeper.delete(path);
+ }
+
+ public void transition(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+
+ String resource = m.getResourceName();
+ String partition = m.getPartitionName();
+ String path = "/" + root + "/" + name + "/" + resource + "_" + partition;
+
+ zookeeper.delete(path);
+ zookeeper.createEphemeral(path, m.getToState());
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/FileTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/FileTargetProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/FileTargetProvider.java
new file mode 100644
index 0000000..6eac3e8
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/FileTargetProvider.java
@@ -0,0 +1,51 @@
+package org.apache.helix.metamanager.impl;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.TargetProviderService;
+
+/**
+ * File-based target model. Container count is extracted from properties file. Count may change dynamically.
+ *
+ */
+public class FileTargetProvider implements TargetProviderService {
+
+ File file;
+
+ public FileTargetProvider() {
+ // left blank
+ }
+
+ public FileTargetProvider(String path) {
+ this.file = new File(path);
+ }
+
+ @Override
+ public int getTargetContainerCount(String containerType) throws FileNotFoundException, IOException, IllegalArgumentException {
+ Properties properties = new Properties();
+ properties.load(new FileReader(file));
+ if (!properties.contains(containerType))
+ throw new IllegalArgumentException(String.format("container type '%s' not found in '%s'", containerType, file.getCanonicalPath()));
+ return Integer.parseInt((String) properties.get(containerType));
+ }
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ this.file = new File(properties.getProperty("path"));
+ }
+
+ @Override
+ public void start() throws Exception {
+ // left blank
+ }
+
+ @Override
+ public void stop() throws Exception {
+ // left blank
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/RedisTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/RedisTargetProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/RedisTargetProvider.java
new file mode 100644
index 0000000..1fdf96e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/RedisTargetProvider.java
@@ -0,0 +1,356 @@
+package org.apache.helix.metamanager.impl;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.metamanager.TargetProviderService;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Redis-specific target model based on recurring Tps benchmarking. Tps target
+ * and probed redis-server instances are configured via zookeeper. Tps target
+ * may change dynamically.
+ *
+ */
+public class RedisTargetProvider implements TargetProviderService {
+
+ static final Logger log = Logger.getLogger(RedisTargetProvider.class);
+
+ public static final String BENCHMARK_COMMAND = "redis-benchmark";
+ public static final String BENCHMARK_TESTS = "GET,SET";
+
+ public static final String DEFAULT_RECORDS = "100000";
+ public static final String DEFAULT_CLIENTS = "20";
+ public static final String DEFAULT_REQUESTS = "100000";
+ public static final String DEFAULT_TIMEOUT = "8000";
+ public static final String DEFAULT_INTERVAL = "10000";
+ public static final String DEFAULT_ALPHA = "0.25";
+
+ ZkClient zookeeper;
+
+ String address;
+ String root;
+
+ int records;
+ int clients;
+ int requests;
+ int timeout;
+ int interval;
+
+ int targetTpsGet;
+ int targetTpsSet;
+
+ int targetCountMin;
+ int targetCountMax;
+ int targetCount;
+
+ double alpha;
+ double averageTpsGet;
+ double averageTpsSet;
+ double averageCount;
+
+ ScheduledExecutorService executor;
+
+ @Override
+ public void configure(Properties properties) {
+ address = properties.getProperty("address");
+ root = properties.getProperty("root");
+ targetTpsGet = Integer.valueOf(properties.getProperty("get", "0"));
+ targetTpsSet = Integer.valueOf(properties.getProperty("set", "0"));
+ targetCountMin = Integer.valueOf(properties.getProperty("min", "-1"));
+ targetCountMax = Integer.valueOf(properties.getProperty("max", "-1"));
+ records = Integer.valueOf(properties.getProperty("records", DEFAULT_RECORDS));
+ clients = Integer.valueOf(properties.getProperty("clients", DEFAULT_CLIENTS));
+ requests = Integer.valueOf(properties.getProperty("requests", DEFAULT_REQUESTS));
+ timeout = Integer.valueOf(properties.getProperty("timeout", DEFAULT_TIMEOUT));
+ interval = Integer.valueOf(properties.getProperty("interval", DEFAULT_INTERVAL));
+ alpha = Double.valueOf(properties.getProperty("alpha", DEFAULT_ALPHA));
+ }
+
+ @Override
+ public void start() {
+ log.debug("starting redis status service");
+ zookeeper = new ZkClient(address);
+ zookeeper.createPersistent("/" + root, true);
+
+ try { zookeeper.createPersistent("/" + root + "/target.get", String.valueOf(targetTpsGet)); } catch (Exception ignore) {}
+ try { zookeeper.createPersistent("/" + root + "/target.set", String.valueOf(targetTpsSet)); } catch (Exception ignore) {}
+ try { zookeeper.createPersistent("/" + root + "/target.min", String.valueOf(targetCountMin)); } catch (Exception ignore) {}
+ try { zookeeper.createPersistent("/" + root + "/target.max", String.valueOf(targetCountMax)); } catch (Exception ignore) {}
+
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new RedisBenchmarkRunnable(), 0, interval, TimeUnit.MILLISECONDS);
+ }
+
+ @Override
+ public void stop() {
+ log.debug("stopping redis status service");
+ if (executor != null) {
+ executor.shutdownNow();
+ while (!executor.isTerminated()) {
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ }
+ executor = null;
+ }
+ if (zookeeper != null) {
+ zookeeper.close();
+ zookeeper = null;
+ }
+ }
+
+ @Override
+ public int getTargetContainerCount(String containerType) throws Exception {
+ return targetCount;
+ }
+
+ private class RedisBenchmarkRunnable implements Runnable {
+ ExecutorService executor = Executors.newCachedThreadPool();
+ RedisResult aggregateResult;
+
+ @Override
+ public void run() {
+ log.debug("running redis benchmark");
+
+ aggregateResult = new RedisResult(0);
+ Collection<Future<RedisResult>> futures = new ArrayList<Future<RedisResult>>();
+
+ try {
+ Collection<RedisTarget> targets = getTargets();
+
+ // start benchmark
+ for (RedisTarget target : targets) {
+ log.debug(String.format("submitting target '%s'", target));
+ Future<RedisResult> future = executor.submit(new RedisCallable(target));
+ futures.add(future);
+ }
+
+ // aggregate results
+ try {
+ log.debug("waiting for results");
+
+ long limit = System.currentTimeMillis() + timeout;
+ for (Future<RedisResult> future : futures) {
+ try {
+ RedisResult result = future.get(limit - System.currentTimeMillis(), TimeUnit.MILLISECONDS);
+ log.debug(String.format("got result '%s'", result));
+ aggregate(result);
+ } catch (Exception e) {
+ log.warn(String.format("failed to get result"));
+ future.cancel(true);
+ }
+ }
+ } catch (Exception e) {
+ log.error("Error running redis benchmark", e);
+
+ for (Future<RedisResult> future : futures) {
+ future.cancel(true);
+ }
+
+ return;
+ }
+
+ // compare to thresholds
+ log.debug(String.format("aggregate result is '%s'", aggregateResult));
+
+ // get target from zookeeper
+ try { targetTpsGet = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.get")); } catch (Exception ignore) {}
+ try { targetTpsSet = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.set")); } catch (Exception ignore) {}
+ try { targetCountMin = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.min")); } catch (Exception ignore) {}
+ try { targetCountMax = Integer.valueOf(zookeeper.<String> readData("/" + root + "/target.max")); } catch (Exception ignore) {}
+
+ averageCount = alpha * aggregateResult.serverCount + (1.0 - alpha) * averageCount;
+
+ // calculate counts
+ int targetCountGet = -1;
+ if (aggregateResult.containsKey("GET")) {
+ double tpsTarget = targetTpsGet;
+ double tps = aggregateResult.get("GET");
+
+ averageTpsGet = alpha * tps + (1.0 - alpha) * averageTpsGet;
+
+ targetCountGet = (int) Math.ceil(tpsTarget / averageTpsGet * averageCount);
+ log.debug(String.format("count.get=%d, target.get=%f, tps.get=%f, tps.avg.get=%f, count.avg=%f", targetCountGet, tpsTarget, tps,
+ averageTpsGet, averageCount));
+ }
+
+ int targetCountSet = -1;
+ if (aggregateResult.containsKey("SET")) {
+ double tpsTarget = targetTpsSet;
+ double tps = aggregateResult.get("SET");
+
+ averageTpsSet = alpha * tps + (1.0 - alpha) * averageTpsSet;
+
+ targetCountSet = (int) Math.ceil(tpsTarget / averageTpsSet * averageCount);
+ log.debug(String.format("count.set=%d, target.set=%f, tps.set=%f, tps.avg.set=%f, count.avg=%f", targetCountSet, tpsTarget, tps,
+ averageTpsSet, averageCount));
+ }
+
+ targetCount = Math.max(targetCountGet, targetCountSet);
+
+ if (targetCountMin > 0)
+ targetCount = Math.max(targetCount, targetCountMin);
+ if (targetCountMax > 0)
+ targetCount = Math.min(targetCount, targetCountMax);
+
+ targetCount = Math.max(targetCount, 1);
+
+ log.debug(String.format("target count is %d", targetCount));
+ RedisTargetProvider.this.targetCount = targetCount;
+
+ } catch (Exception e) {
+ log.error("Error running redis benchmark", e);
+
+ for (Future<RedisResult> future : futures) {
+ future.cancel(true);
+ }
+ }
+
+ }
+
+ Collection<RedisTarget> getTargets() {
+ log.debug("fetching redis servers from zookeeper");
+ Collection<RedisTarget> targets = new ArrayList<RedisTarget>();
+ Collection<String> servers = zookeeper.getChildren("/" + root);
+
+ servers.remove("target.get");
+ servers.remove("target.set");
+ servers.remove("target.min");
+ servers.remove("target.max");
+
+ for (String server : servers) {
+ if (!zookeeper.exists("/" + root + "/" + server + "/heartbeat"))
+ continue;
+
+ String hostname = zookeeper.readData("/" + root + "/" + server + "/hostname");
+ int port = Integer.valueOf(zookeeper.<String> readData("/" + root + "/" + server + "/port"));
+
+ targets.add(new RedisTarget(hostname, port));
+ }
+
+ log.debug(String.format("found %d servers: %s", targets.size(), targets));
+ return targets;
+ }
+
+ void aggregate(RedisResult result) {
+ RedisResult newResult = new RedisResult(aggregateResult.serverCount + result.serverCount);
+
+ for (Entry<String, Double> entry : result.entrySet()) {
+ double current = 0.0d;
+ if (aggregateResult.containsKey(entry.getKey()))
+ current = aggregateResult.get(entry.getKey());
+
+ current += entry.getValue();
+ newResult.put(entry.getKey(), current);
+ }
+
+ aggregateResult = newResult;
+ }
+ }
+
+ private static class RedisTarget {
+ final String hostname;
+ final int port;
+
+ public RedisTarget(String hostname, int port) {
+ this.hostname = hostname;
+ this.port = port;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("%s:%d", hostname, port);
+ }
+ }
+
+ private static class RedisResult extends HashMap<String, Double> {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 4599748807597500952L;
+
+ final int serverCount;
+
+ public RedisResult(int serverCount) {
+ this.serverCount = serverCount;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("[serverCount=%d %s]", serverCount, super.toString());
+ }
+ }
+
+ private class RedisCallable implements Callable<RedisResult> {
+ final RedisTarget target;
+
+ public RedisCallable(RedisTarget target) {
+ this.target = target;
+ }
+
+ @Override
+ public RedisResult call() throws Exception {
+ log.debug(String.format("executing benchmark for '%s'", target));
+
+ ProcessBuilder builder = new ProcessBuilder();
+ builder.command(BENCHMARK_COMMAND, "-h", target.hostname, "-p", String.valueOf(target.port), "-r", String.valueOf(records), "-n",
+ String.valueOf(requests), "-c", String.valueOf(clients), "-t", BENCHMARK_TESTS, "--csv");
+ Process process = builder.start();
+
+ log.debug(String.format("running '%s'", builder.command()));
+
+ RedisResult result = new RedisResult(1);
+
+ int retVal;
+ try {
+ retVal = process.waitFor();
+ } catch (InterruptedException e) {
+ process.destroy();
+ return result;
+ }
+
+ Preconditions.checkState(retVal == 0, "Benchmark process returned %s", retVal);
+
+ Pattern pattern = Pattern.compile("\"([A-Z0-9_]+).*\",\"([0-9\\.]+)\"");
+
+ log.debug("parsing output");
+ BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));
+ String line = null;
+ while ((line = reader.readLine()) != null) {
+ Matcher matcher = pattern.matcher(line);
+
+ if (!matcher.find())
+ continue;
+
+ String key = matcher.group(1);
+ Double value = Double.valueOf(matcher.group(2));
+
+ result.put(key, value);
+ }
+
+ log.debug(String.format("benchmark for '%s' returned '%s'", target, result));
+
+ return result;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/StaticTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/StaticTargetProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/StaticTargetProvider.java
new file mode 100644
index 0000000..3159fbe
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/StaticTargetProvider.java
@@ -0,0 +1,62 @@
+package org.apache.helix.metamanager.impl;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.TargetProviderService;
+import org.apache.log4j.Logger;
+
+/**
+ * Target model based on manually set count. Count may change dynamically.
+ *
+ */
+public class StaticTargetProvider implements TargetProviderService {
+ static final Logger log = Logger.getLogger(StaticTargetProvider.class);
+
+ final Map<String, Integer> targetCounts = new HashMap<String, Integer>();
+
+ public StaticTargetProvider() {
+ // left blank
+ }
+
+ public StaticTargetProvider(Map<String, Integer> targetCounts) {
+ this.targetCounts.putAll(targetCounts);
+ }
+
+ @Override
+ public int getTargetContainerCount(String containerType) {
+ return targetCounts.get(containerType);
+ }
+
+ public void setTargetContainerCount(String containerType, int targetCount) {
+ targetCounts.put(containerType, targetCount);
+ }
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ for (Entry<Object, Object> entry : properties.entrySet()) {
+ String key = (String) entry.getKey();
+
+ try {
+ int value = Integer.valueOf((String) entry.getValue());
+ log.debug(String.format("Inserting value '%s = %d'", key, value));
+ targetCounts.put(key, value);
+ } catch (NumberFormatException e) {
+ log.warn(String.format("Skipping '%s', not an integer (value='%s')", key, (String) entry.getValue()));
+ }
+ }
+ }
+
+ @Override
+ public void start() throws Exception {
+ // left blank
+ }
+
+ @Override
+ public void stop() throws Exception {
+ // left blank
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/DummyMasterSlaveProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/DummyMasterSlaveProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/DummyMasterSlaveProcess.java
new file mode 100644
index 0000000..2d91bdd
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/DummyMasterSlaveProcess.java
@@ -0,0 +1,76 @@
+package org.apache.helix.metamanager.impl.container;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Sample implementation of container with Helix participant for MasterSlave
+ * state model. Print state transitions only.
+ *
+ */
+public class DummyMasterSlaveProcess extends ContainerProcess {
+
+ static final Logger log = Logger.getLogger(DummyMasterSlaveProcess.class);
+
+ public DummyMasterSlaveProcess(ContainerProcessProperties properties) throws Exception {
+ configure(properties);
+ setModelName("MasterSlave");
+ setModelFactory(new DummyMasterSlaveModelFactory());
+ }
+
+ @Override
+ protected void startContainer() throws Exception {
+ log.info("starting dummy process container");
+ }
+
+ @Override
+ protected void stopContainer() throws Exception {
+ log.info("stopping dummy process container");
+ }
+
+ public static class DummyMasterSlaveModelFactory extends StateModelFactory<DummyMasterSlaveStateModel> {
+ @Override
+ public DummyMasterSlaveStateModel createNewStateModel(String partitionName) {
+ return new DummyMasterSlaveStateModel();
+ }
+ }
+
+ @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+ public static class DummyMasterSlaveStateModel extends StateModel {
+
+ static final Logger log = Logger.getLogger(DummyMasterSlaveStateModel.class);
+
+ @Transition(from = "OFFLINE", to = "SLAVE")
+ public void offlineToSlave(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to SLAVE", context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "SLAVE", to = "OFFLINE")
+ public void slaveToOffline(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from SLAVE to OFFLINE", context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "SLAVE", to = "MASTER")
+ public void slaveToMaster(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from SLAVE to MASTER", context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "MASTER", to = "SLAVE")
+ public void masterToSlave(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from MASTER to SLAVE", context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void offlineToDropped(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to DROPPED", context.getManager().getInstanceName()));
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/DummyOnlineOfflineProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/DummyOnlineOfflineProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/DummyOnlineOfflineProcess.java
new file mode 100644
index 0000000..62f63a8
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/DummyOnlineOfflineProcess.java
@@ -0,0 +1,66 @@
+package org.apache.helix.metamanager.impl.container;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Sample implementation of container with Helix participant for OnlineOffline
+ * state model. Print state transitions only.
+ *
+ */
+public class DummyOnlineOfflineProcess extends ContainerProcess {
+
+ static final Logger log = Logger.getLogger(DummyOnlineOfflineProcess.class);
+
+ public DummyOnlineOfflineProcess(ContainerProcessProperties properties) throws Exception {
+ configure(properties);
+ setModelName("OnlineOffline");
+ setModelFactory(new DummyOnlineOfflineModelFactory());
+ }
+
+ @Override
+ protected void startContainer() throws Exception {
+ log.info("starting dummy online-offline process container");
+ }
+
+ @Override
+ protected void stopContainer() throws Exception {
+ log.info("stopping dummy online-offline process container");
+ }
+
+ public static class DummyOnlineOfflineModelFactory extends StateModelFactory<DummyOnlineOfflineStateModel> {
+ @Override
+ public DummyOnlineOfflineStateModel createNewStateModel(String partitionName) {
+ return new DummyOnlineOfflineStateModel();
+ }
+ }
+
+ @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE", "DROPPED" })
+ public static class DummyOnlineOfflineStateModel extends StateModel {
+
+ static final Logger log = Logger.getLogger(DummyOnlineOfflineStateModel.class);
+
+ @Transition(from = "OFFLINE", to = "ONLINE")
+ public void offlineToOnline(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to ONLINE", context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "ONLINE", to = "OFFLINE")
+ public void onlineToOffline(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from ONLINE to OFFLINE", context.getManager().getInstanceName()));
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void offlineToDropped(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from OFFLINE to DROPPED", context.getManager().getInstanceName()));
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/RedisServerProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/RedisServerProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/RedisServerProcess.java
new file mode 100644
index 0000000..c87f905
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/RedisServerProcess.java
@@ -0,0 +1,140 @@
+package org.apache.helix.metamanager.impl.container;
+
+import java.net.InetAddress;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Container implementation for redis-server. Uses OnlineOffline model, spawns
+ * Redis as Shell process and writes metadata to zookeeper.
+ *
+ */
+public class RedisServerProcess extends ContainerProcess {
+
+ static final Logger log = Logger.getLogger(RedisServerProcess.class);
+
+ public static final String REDIS_SERVER_COMMAND = "redis-server";
+
+ public static final long MONITOR_INTERVAL = 5000;
+
+ ZkClient zookeeper;
+
+ final String address;
+ final String root;
+ final String name;
+ final int basePort;
+
+ Process process;
+
+ ScheduledExecutorService executor;
+
+ public RedisServerProcess(ContainerProcessProperties properties) throws Exception {
+ configure(properties);
+ setModelName("OnlineOffline");
+ setModelFactory(new RedisServerModelFactory());
+
+ address = properties.getProperty("address");
+ root = properties.getProperty("root");
+ basePort = Integer.valueOf(properties.getProperty("baseport"));
+ name = properties.getProperty(ContainerProcessProperties.NAME);
+ }
+
+ @Override
+ protected void startContainer() throws Exception {
+ log.info(String.format("starting redis server container for instance '%s'", name));
+
+ String hostname = InetAddress.getLocalHost().getHostName();
+ int port = basePort + Integer.valueOf(name.split("_")[1]);
+
+ log.debug(String.format("Starting redis server at '%s:%d'", hostname, port));
+
+ ProcessBuilder builder = new ProcessBuilder();
+ builder.command(REDIS_SERVER_COMMAND, "--port", String.valueOf(port));
+ process = builder.start();
+
+ log.debug("Updating zookeeper");
+ zookeeper = new ZkClient(address);
+ zookeeper.deleteRecursive("/" + root + "/" + name);
+ zookeeper.createPersistent("/" + root + "/" + name, true);
+ zookeeper.createPersistent("/" + root + "/" + name + "/hostname", hostname);
+ zookeeper.createPersistent("/" + root + "/" + name + "/port", String.valueOf(port));
+ zookeeper.createEphemeral("/" + root + "/" + name + "/heartbeat");
+
+ log.debug("Starting process monitor");
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new ProcessMonitor(), 0, MONITOR_INTERVAL, TimeUnit.MILLISECONDS);
+
+ }
+
+ @Override
+ protected void stopContainer() throws Exception {
+ log.info("stopping redis server container");
+
+ log.debug("Stopping process monitor");
+ executor.shutdownNow();
+
+ log.debug("Updating zookeeper");
+ zookeeper.deleteRecursive("/" + root + "/" + name);
+ zookeeper.close();
+
+ log.debug("Stopping process");
+ process.destroy();
+ process.waitFor();
+ }
+
+ public class RedisServerModelFactory extends StateModelFactory<RedisServerModel> {
+ @Override
+ public RedisServerModel createNewStateModel(String partitionName) {
+ return new RedisServerModel();
+ }
+ }
+
+ @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE", "DROPPED" })
+ public class RedisServerModel extends StateModel {
+
+ @Transition(from = "OFFLINE", to = "ONLINE")
+ public void offlineToSlave(Message m, NotificationContext context) {
+ // left blank
+ log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+ }
+
+ @Transition(from = "ONLINE", to = "OFFLINE")
+ public void slaveToOffline(Message m, NotificationContext context) {
+ // left blank
+ log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void offlineToDropped(Message m, NotificationContext context) {
+ // left blank
+ log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+ }
+
+ }
+
+ private class ProcessMonitor implements Runnable {
+ @Override
+ public void run() {
+ try {
+ process.exitValue();
+ log.warn("detected process failure");
+ fail();
+ } catch (Exception e) {
+ // expected
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/ZookeeperMasterSlaveProcess.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/ZookeeperMasterSlaveProcess.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/ZookeeperMasterSlaveProcess.java
new file mode 100644
index 0000000..a493a71
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/impl/container/ZookeeperMasterSlaveProcess.java
@@ -0,0 +1,108 @@
+package org.apache.helix.metamanager.impl.container;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.apache.helix.NotificationContext;
+import org.apache.helix.metamanager.container.ContainerProcess;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Sample implementation of container with Helix participant for MasterSlave
+ * state model. Writes current state to separate zookeeper domain.
+ *
+ */
+public class ZookeeperMasterSlaveProcess extends ContainerProcess {
+
+ static final Logger log = Logger.getLogger(ZookeeperMasterSlaveProcess.class);
+
+ ZkClient zookeeper;
+
+ final String address;
+ final String root;
+ final String name;
+
+ public ZookeeperMasterSlaveProcess(ContainerProcessProperties properties) throws Exception {
+ configure(properties);
+ setModelName("MasterSlave");
+ setModelFactory(new ZookeeperMasterSlaveModelFactory());
+
+ address = properties.getProperty("address");
+ root = properties.getProperty("root");
+ name = properties.getProperty(ContainerProcessProperties.NAME);
+ }
+
+ @Override
+ protected void startContainer() throws Exception {
+ log.info("starting zookeeper process container");
+
+ zookeeper = new ZkClient(address);
+ zookeeper.createPersistent("/" + root + "/" + name, true);
+ }
+
+ @Override
+ protected void stopContainer() throws Exception {
+ log.info("stopping zookeeper process container");
+
+ zookeeper.close();
+ }
+
+ public class ZookeeperMasterSlaveModelFactory extends StateModelFactory<ZookeeperMasterSlaveModel> {
+ @Override
+ public ZookeeperMasterSlaveModel createNewStateModel(String partitionName) {
+ return new ZookeeperMasterSlaveModel();
+ }
+ }
+
+ @StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "SLAVE", "MASTER", "DROPPED" })
+ public class ZookeeperMasterSlaveModel extends StateModel {
+
+ @Transition(from = "OFFLINE", to = "SLAVE")
+ public void offlineToSlave(Message m, NotificationContext context) {
+ transition(m, context);
+ }
+
+ @Transition(from = "SLAVE", to = "OFFLINE")
+ public void slaveToOffline(Message m, NotificationContext context) {
+ transition(m, context);
+ }
+
+ @Transition(from = "SLAVE", to = "MASTER")
+ public void slaveToMaster(Message m, NotificationContext context) {
+ transition(m, context);
+ }
+
+ @Transition(from = "MASTER", to = "SLAVE")
+ public void masterToSlave(Message m, NotificationContext context) {
+ transition(m, context);
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void offlineToDropped(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+
+ String resource = m.getResourceName();
+ String partition = m.getPartitionName();
+ String path = "/" + root + "/" + name + "/" + resource + "_" + partition;
+
+ zookeeper.delete(path);
+ }
+
+ public void transition(Message m, NotificationContext context) {
+ log.trace(String.format("%s transitioning from %s to %s", context.getManager().getInstanceName(), m.getFromState(), m.getToState()));
+
+ String resource = m.getResourceName();
+ String partition = m.getPartitionName();
+ String path = "/" + root + "/" + name + "/" + resource + "_" + partition;
+
+ zookeeper.delete(path);
+ zookeeper.createEphemeral(path, m.getToState());
+ }
+
+ }
+
+}
[08/15] Adding Helix-task-framework and Yarn integration modules
Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/BootUtils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/BootUtils.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/BootUtils.java
new file mode 100644
index 0000000..fbbfa14
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/BootUtils.java
@@ -0,0 +1,127 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.util.Collection;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.helix.metamanager.container.ContainerProcessProperties;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+public class BootUtils {
+
+ public static final String CLASS_PROPERTY = "class";
+ static final Logger log = Logger.getLogger(BootUtils.class);
+
+ public static boolean hasNamespace(Properties properties, String namespace) {
+ String prefix = namespace + ".";
+ for (String key : properties.stringPropertyNames()) {
+ if (key.startsWith(prefix))
+ return true;
+ }
+ return false;
+ }
+
+ public static Set<String> getNamespaces(Properties properties) {
+ Pattern pattern = Pattern.compile("^([^\\.\\=]+)");
+
+ Set<String> namespaces = Sets.newHashSet();
+
+ for (Map.Entry<Object, Object> rawEntry : properties.entrySet()) {
+ String key = (String) rawEntry.getKey();
+
+ Matcher matcher = pattern.matcher(key);
+ if(matcher.find()) {
+ namespaces.add(matcher.group(1));
+ }
+ }
+
+ return namespaces;
+ }
+
+ public static Properties getNamespace(Properties source, String namespace) {
+ Properties dest = new Properties();
+ String prefix = namespace + ".";
+
+ for (Map.Entry<Object, Object> rawEntry : source.entrySet()) {
+ String key = (String) rawEntry.getKey();
+ String value = (String) rawEntry.getValue();
+
+ if (key.startsWith(prefix)) {
+ String newKey = key.substring(prefix.length());
+ dest.put(newKey, value);
+ }
+ }
+
+ return dest;
+ }
+
+ @SuppressWarnings("unchecked")
+ public static <T> T createInstance(Properties properties) throws Exception {
+ String className = properties.getProperty(CLASS_PROPERTY);
+
+ Class<?> containerClass = Class.forName(className);
+
+ try {
+ log.debug(String.format("checking for properties constructor in class '%s'", className));
+ return (T) containerClass.getConstructor(ContainerProcessProperties.class).newInstance(properties);
+ } catch (Exception e) {
+ log.debug("no properties constructor found");
+ }
+
+ try {
+ log.debug(String.format("checking for default constructor in class '%s'", className));
+ return (T) containerClass.getConstructor().newInstance();
+ } catch (Exception e) {
+ log.debug("no default constructor found");
+ }
+
+ throw new Exception(String.format("no suitable constructor for class '%s'", className));
+ }
+
+ public static <T> T createInstanceFromNamespace(Properties properties, String namespace) throws Exception {
+ return createInstance(getNamespace(properties, namespace));
+ }
+
+ @SuppressWarnings("unchecked")
+ public static <T> T createInstance(Class<?> clazz) throws Exception {
+ try {
+ log.debug(String.format("checking for default constructor in class '%s'", clazz.getSimpleName()));
+ return (T) clazz.getConstructor().newInstance();
+ } catch (Exception e) {
+ log.debug("no default constructor found");
+ }
+
+ throw new Exception(String.format("no suitable constructor for class '%s'", clazz.getSimpleName()));
+ }
+
+ public static <T> T createInstance(String className) throws Exception {
+ return createInstance(Class.forName(className));
+ }
+
+ public static Collection<Properties> getContainerProps(Properties properties) {
+ Collection<Properties> containerProps = Lists.newArrayList();
+
+ String containers = properties.getProperty("containers");
+ String containerTypes[] = StringUtils.split(containers, ",");
+
+ for (String containerType : containerTypes) {
+ Properties containerProp = BootUtils.getNamespace(BootUtils.getNamespace(properties, "container"), containerType);
+ log.debug(String.format("adding container type (type='%s', properties='%s')", containerType, containerProp));
+ containerProps.add(containerProp);
+ }
+
+ return containerProps;
+ }
+
+ private BootUtils() {
+ // left blank
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/Bootstrapper.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/Bootstrapper.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/Bootstrapper.java
new file mode 100644
index 0000000..94de15f
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/Bootstrapper.java
@@ -0,0 +1,93 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.util.Properties;
+
+import org.apache.log4j.Logger;
+
+public class Bootstrapper {
+
+ static final Logger log = Logger.getLogger(Bootstrapper.class);
+
+ ManagedCluster managed;
+ MetaCluster meta;
+ ZookeeperWrapper zookeeper;
+ Properties properties;
+
+ public Bootstrapper(Properties properties) {
+ this.properties = properties;
+ }
+
+ public void start() throws Exception {
+ log.info("bootstrapping cluster");
+ if (BootUtils.hasNamespace(properties, "zookeeper")) {
+ log.info("starting zookeeper");
+ zookeeper = new ZookeeperWrapper(BootUtils.getNamespace(properties, "zookeeper"));
+ zookeeper.startService();
+ }
+
+ log.info("starting managed cluster");
+ managed = new ManagedCluster();
+ managed.setProperties(BootUtils.getNamespace(properties, "managed"));
+ managed.start();
+
+ log.info("starting meta cluster");
+ meta = new MetaCluster();
+ meta.setProperties(BootUtils.getNamespace(properties, "meta"));
+ meta.start();
+ }
+
+ public void stop() throws Exception {
+ log.info("tearing down cluster");
+ if (meta != null) {
+ log.info("stopping meta cluster");
+ meta.stop();
+ meta = null;
+ }
+ if (managed != null) {
+ log.info("stopping managed cluster");
+ managed.stop();
+ managed = null;
+ }
+ if (zookeeper != null) {
+ log.info("stopping zookeeper");
+ zookeeper.stopService();
+ zookeeper = null;
+ }
+
+ }
+
+ public ManagedCluster getManaged() {
+ return managed;
+ }
+
+ public MetaCluster getMeta() {
+ return meta;
+ }
+
+ public ZookeeperWrapper getZookeeper() {
+ return zookeeper;
+ }
+
+ public Properties getProperties() {
+ return properties;
+ }
+
+ public static void main(String[] args) throws Exception {
+ String resourcePath = args[0];
+
+ log.info(String.format("reading cluster definition from '%s'", resourcePath));
+ Properties properties = new Properties();
+ properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+
+ final Bootstrapper boot = new Bootstrapper(properties);
+ boot.start();
+
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ try { boot.stop(); } catch(Exception ignore) {}
+ }
+ }));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ManagedCluster.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ManagedCluster.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ManagedCluster.java
new file mode 100644
index 0000000..b792c9f
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ManagedCluster.java
@@ -0,0 +1,87 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.util.Properties;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+public class ManagedCluster {
+
+ static final Logger log = Logger.getLogger(ManagedCluster.class);
+
+ public static final String DEFAULT_CLUSTER = "managed";
+
+ Properties properties;
+
+ HelixAdmin admin;
+ HelixManager controllerMananger;
+
+ public void start() {
+ String cluster = properties.getProperty("cluster", DEFAULT_CLUSTER);
+ String address = properties.getProperty("address");
+
+ log.info(String.format("starting managed cluster service (cluster='%s', address='%s')", cluster, address));
+
+ log.debug("setting up cluster admin");
+ admin = new ZKHelixAdmin(address);
+ admin.addCluster(cluster, false);
+ admin.addStateModelDef(cluster, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+ admin.addStateModelDef(cluster, "MasterSlave", new StateModelDefinition(StateModelConfigGenerator.generateConfigForMasterSlave()));
+
+ log.debug("setting up resources");
+ String resources = properties.getProperty("resources");
+ String[] resourceNames = StringUtils.split(resources, ",");
+
+ for (String resourceName : resourceNames) {
+ Properties properties = BootUtils.getNamespace(BootUtils.getNamespace(this.properties, "resource"), resourceName);
+
+ log.debug(String.format("parsing resource '%s' (properties='%s')", resourceName, properties));
+
+ String container = properties.getProperty("container");
+ String model = properties.getProperty("model");
+ int partitions = Integer.parseInt(properties.getProperty("partitions"));
+ int replica = Integer.parseInt(properties.getProperty("replica"));
+
+ log.debug(String.format("setting up resource '%s' (container='%s', model='%s', partitions=%d, replica=%d)", resourceName, container, model,
+ partitions, replica));
+
+ admin.addResource(cluster, resourceName, partitions, model, RebalanceMode.FULL_AUTO.toString());
+ IdealState idealState = admin.getResourceIdealState(cluster, resourceName);
+ idealState.setInstanceGroupTag(container);
+ idealState.setReplicas(String.valueOf(replica));
+ admin.setResourceIdealState(cluster, resourceName, idealState);
+ }
+
+ log.debug("setting up controller");
+ controllerMananger = HelixControllerMain.startHelixController(address, cluster, "managedController", HelixControllerMain.STANDALONE);
+ }
+
+ public void stop() {
+ log.info("stopping managed cluster service");
+ if (controllerMananger != null) {
+ controllerMananger.disconnect();
+ controllerMananger = null;
+ }
+ if (admin != null) {
+ admin.close();
+ admin = null;
+ }
+ }
+
+ public Properties getProperties() {
+ return properties;
+ }
+
+ public void setProperties(Properties properties) {
+ this.properties = properties;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/MetaCluster.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/MetaCluster.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/MetaCluster.java
new file mode 100644
index 0000000..51700a8
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/MetaCluster.java
@@ -0,0 +1,201 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.ConfigTool;
+import org.apache.helix.metamanager.provider.ProviderProcess;
+import org.apache.helix.metamanager.provider.ProviderRebalancer;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.helix.model.InstanceConfig;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+public class MetaCluster {
+
+ static final Logger log = Logger.getLogger(MetaCluster.class);
+
+ private static final String DEFAULT_CLUSTER = "meta";
+ private static final String DEFAULT_MANAGED = "managed";
+ private static final String DEFAULT_INTERVAL = "10000";
+
+ Properties properties;
+
+ TargetWrapper target;
+ StatusWrapper status;
+ ProviderWrapper provider;
+
+ HelixAdmin admin;
+ HelixManager controllerManager;
+ ProviderProcess providerProcess;
+
+ String cluster;
+ String address;
+ String managed;
+ int interval;
+
+ ScheduledExecutorService executor;
+
+ public Properties getProperties() {
+ return properties;
+ }
+
+ public void setProperties(Properties properties) {
+ this.properties = properties;
+ }
+
+ public void start() throws Exception {
+ Preconditions.checkArgument(BootUtils.hasNamespace(properties, "target"), "No 'target' property specified");
+ Preconditions.checkArgument(BootUtils.hasNamespace(properties, "status"), "No 'status' property specified");
+ Preconditions.checkArgument(BootUtils.hasNamespace(properties, "provider"), "No 'provider' property specified");
+
+ cluster = properties.getProperty("cluster", DEFAULT_CLUSTER);
+ address = properties.getProperty("address");
+ managed = properties.getProperty("managed", DEFAULT_MANAGED);
+ interval = Integer.valueOf(properties.getProperty("interval", DEFAULT_INTERVAL));
+
+ log.info(String.format("starting meta cluster service (cluster='%s', address='%s', managed='%s')", cluster, address, managed));
+
+ log.debug("setting up cluster admin");
+ admin = new ZKHelixAdmin(address);
+ admin.addCluster(cluster, false);
+ admin.addStateModelDef(cluster, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+
+ log.debug("setting up target service");
+ target = new TargetWrapper(BootUtils.getNamespace(properties, "target"));
+ target.startService();
+
+ log.debug("setting up container status service");
+ status = new StatusWrapper(BootUtils.getNamespace(properties, "status"));
+ status.startService();
+
+ log.debug("setting up container provider service");
+ provider = new ProviderWrapper(BootUtils.getNamespace(properties, "provider"));
+ admin.addInstance(cluster, new InstanceConfig(provider.getProviderName()));
+
+ provider.startService();
+
+ log.debug("setting up config tool");
+ ConfigTool.setTargetProvider(target.getTarget());
+ ConfigTool.setStatusProvider(status.getStatus());
+
+ log.debug("setting up provider");
+ String providerName = provider.getProviderName();
+
+ admin.addInstance(cluster, new InstanceConfig(providerName));
+
+ for (String containerType : provider.getContainerTypes()) {
+ log.debug(String.format("setting up container type '%s'", containerType));
+
+ admin.addResource(cluster, containerType, target.getTarget().getTargetContainerCount(containerType), "OnlineOffline",
+ RebalanceMode.USER_DEFINED.toString());
+
+ IdealState idealState = admin.getResourceIdealState(cluster, containerType);
+ idealState.setRebalancerClassName(ProviderRebalancer.class.getName());
+ idealState.setReplicas("1");
+
+ // BEGIN workaround
+ // FIXME workaround for HELIX-226
+ Map<String, List<String>> listFields = Maps.newHashMap();
+ Map<String, Map<String, String>> mapFields = Maps.newHashMap();
+ for(int i=0; i<256; i++) {
+ String partitionName = containerType + "_" + i;
+ listFields.put(partitionName, new ArrayList<String>());
+ mapFields.put(partitionName, new HashMap<String, String>());
+ }
+ idealState.getRecord().setListFields(listFields);
+ idealState.getRecord().setMapFields(mapFields);
+ // END workaround
+
+ admin.setResourceIdealState(cluster, containerType, idealState);
+ }
+
+ log.debug("starting controller");
+ controllerManager = HelixControllerMain.startHelixController(address, cluster, "metaController", HelixControllerMain.STANDALONE);
+
+ log.debug("starting state refresh service");
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new MetaRefreshRunnable(), interval, interval, TimeUnit.MILLISECONDS);
+
+ HelixConfigScope scope = new HelixConfigScopeBuilder(ConfigScopeProperty.CLUSTER, cluster).build();
+ admin.setConfig(scope, Collections.singletonMap("key", "value"));
+
+ }
+
+ public void stop() throws Exception {
+ log.info("stopping meta cluster service");
+ if (executor != null) {
+ executor.shutdownNow();
+ executor = null;
+ }
+ if (controllerManager != null) {
+ controllerManager.disconnect();
+ controllerManager = null;
+ }
+ if (providerProcess != null) {
+ providerProcess.stop();
+ providerProcess = null;
+ }
+ if (provider != null) {
+ provider.stopService();
+ provider = null;
+ }
+ if (status != null) {
+ status.stopService();
+ status = null;
+ }
+ if (target != null) {
+ target.stopService();
+ target = null;
+ }
+ if (admin != null) {
+ admin.close();
+ admin = null;
+ }
+ }
+
+ public TargetWrapper getTarget() {
+ return target;
+ }
+
+ public StatusWrapper getStatus() {
+ return status;
+ }
+
+ public ProviderWrapper getProvider() {
+ return provider;
+ }
+
+ private class MetaRefreshRunnable implements Runnable {
+ @Override
+ public void run() {
+ log.debug("running status refresh");
+ for (String containerType : provider.getContainerTypes()) {
+ log.debug(String.format("refreshing container type '%s'", containerType));
+
+ IdealState poke = admin.getResourceIdealState(cluster, containerType);
+ admin.setResourceIdealState(cluster, containerType, poke);
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ProviderWrapper.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ProviderWrapper.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ProviderWrapper.java
new file mode 100644
index 0000000..b8e35bb
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ProviderWrapper.java
@@ -0,0 +1,162 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.helix.metamanager.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.metamanager.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.metamanager.impl.yarn.YarnContainerProviderProperties;
+import org.apache.log4j.Logger;
+
+public class ProviderWrapper {
+
+ static final Logger log = Logger.getLogger(ProviderWrapper.class);
+
+ WrapperImpl impl;
+ Properties properties;
+
+ public ProviderWrapper(Properties properties) {
+ this.properties = properties;
+ }
+
+ public void startService() throws Exception {
+ String type = (String) properties.get("type");
+
+ log.info(String.format("starting container provider service (type='%s')", type));
+
+ if ("local".equals(type)) {
+ impl = new LocalWrapperImpl();
+
+ } else if ("shell".equals(type)) {
+ impl = new ShellWrapperImpl();
+
+ } else if ("yarn".equals(type)) {
+ impl = new YarnWrapperImpl();
+
+ } else {
+ throw new IllegalArgumentException(String.format("type '%s' not supported", type));
+ }
+
+ impl.startService();
+ }
+
+ public void stopService() throws Exception {
+ impl.stopService();
+ }
+
+ public String getProviderName() {
+ return properties.getProperty("name");
+ }
+
+ public Set<String> getContainerTypes() {
+ String containers = properties.getProperty("containers");
+ String containerTypes[] = StringUtils.split(containers, ",");
+ return new HashSet<String>(Arrays.asList(containerTypes));
+ }
+
+ static interface WrapperImpl {
+ void startService() throws Exception;
+
+ void stopService() throws Exception;
+ }
+
+ class LocalWrapperImpl implements WrapperImpl {
+ LocalContainerProviderProcess process;
+
+ @Override
+ public void startService() throws Exception {
+ String name = properties.getProperty("name");
+ String address = properties.getProperty("address");
+ String cluster = properties.getProperty("cluster");
+ String containers = properties.getProperty("containers");
+
+ log.debug(String.format("creating local container provider (name='%s', address='%s', cluster='%s', containers='%s')", name, address, cluster,
+ containers));
+
+ process = new LocalContainerProviderProcess();
+ process.configure(properties);
+ process.start();
+ }
+
+ @Override
+ public void stopService() throws Exception {
+ process.stop();
+ process = null;
+ }
+
+ }
+
+ class ShellWrapperImpl implements WrapperImpl {
+
+ ShellContainerProviderProcess process;
+
+ @Override
+ public void startService() throws Exception {
+ String name = properties.getProperty("name");
+ String address = properties.getProperty("address");
+ String cluster = properties.getProperty("cluster");
+ String containers = properties.getProperty("containers");
+
+ log.debug(String.format("creating shell container provider (name='%s', address='%s', cluster='%s', containers='%s')", name, address, cluster,
+ containers));
+
+ process = new ShellContainerProviderProcess();
+ process.configure(properties);
+ process.start();
+ }
+
+ @Override
+ public void stopService() throws Exception {
+ process.stop();
+ process = null;
+ }
+
+ }
+
+ class YarnWrapperImpl implements WrapperImpl {
+
+ YarnContainerProviderProcess process;
+
+ @Override
+ public void startService() throws Exception {
+ String name = properties.getProperty("name");
+ String address = properties.getProperty("address");
+ String cluster = properties.getProperty("cluster");
+ String containers = properties.getProperty("containers");
+ String metadata = properties.getProperty("metadata");
+ String resourcemanager = properties.getProperty("resourcemananger");
+ String scheduler = properties.getProperty("scheduler");
+ String user = properties.getProperty("user");
+ String hdfs = properties.getProperty("hdfs");
+
+ YarnContainerProviderProperties yarnProperties = new YarnContainerProviderProperties();
+ yarnProperties.setProperty(YarnContainerProviderProperties.CLUSTER, cluster);
+ yarnProperties.setProperty(YarnContainerProviderProperties.ADDRESS, address);
+ yarnProperties.setProperty(YarnContainerProviderProperties.NAME, name);
+ yarnProperties.setProperty(YarnContainerProviderProperties.METADATA, metadata);
+ yarnProperties.setProperty(YarnContainerProviderProperties.RESOURCEMANAGER, resourcemanager);
+ yarnProperties.setProperty(YarnContainerProviderProperties.SCHEDULER, scheduler);
+ yarnProperties.setProperty(YarnContainerProviderProperties.USER, user);
+ yarnProperties.setProperty(YarnContainerProviderProperties.HDFS, hdfs);
+
+ log.debug(String.format("creating yarn container provider (name='%s', address='%s', cluster='%s', metadata='%s', resourcemananger='%s', " +
+ "scheduler='%s', user='%s', hdfs='%s', containers='%s')", name, address, cluster, metadata, resourcemanager, scheduler, user, hdfs, containers));
+
+ process = new YarnContainerProviderProcess();
+ process.configure(yarnProperties);
+ process.start();
+ }
+
+ @Override
+ public void stopService() throws Exception {
+ process.stop();
+ process = null;
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/StatusWrapper.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/StatusWrapper.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/StatusWrapper.java
new file mode 100644
index 0000000..20fa0db
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/StatusWrapper.java
@@ -0,0 +1,122 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.util.Properties;
+
+import org.apache.helix.metamanager.StatusProvider;
+import org.apache.helix.metamanager.StatusProviderService;
+import org.apache.helix.metamanager.impl.local.LocalStatusProvider;
+import org.apache.helix.metamanager.impl.shell.ShellStatusProvider;
+import org.apache.helix.metamanager.impl.yarn.YarnStatusProvider;
+import org.apache.log4j.Logger;
+
+public class StatusWrapper {
+
+ static final Logger log = Logger.getLogger(StatusWrapper.class);
+
+ WrapperImpl impl;
+ StatusProviderService status;
+ Properties properties;
+
+ public StatusWrapper(Properties properties) {
+ this.properties = properties;
+ }
+
+ public void startService() throws Exception {
+ String type = (String) properties.get("type");
+
+ log.info(String.format("starting container status service (type='%s')", type));
+
+ if ("local".equals(type)) {
+ impl = new LocalWrapperImpl();
+
+ } else if ("shell".equals(type)) {
+ impl = new ShellWrapperImpl();
+
+ } else if ("yarn".equals(type)) {
+ impl = new YarnWrapperImpl();
+
+ } else {
+ throw new IllegalArgumentException(String.format("type '%s' not supported", type));
+ }
+
+ impl.startService();
+ }
+
+ public void stopService() throws Exception {
+ log.debug("stopping container status provider");
+ impl.stopService();
+ status = null;
+ }
+
+ public StatusProvider getStatus() {
+ return status;
+ }
+
+ static interface WrapperImpl {
+ void startService() throws Exception;
+
+ void stopService() throws Exception;
+ }
+
+ class LocalWrapperImpl implements WrapperImpl {
+
+ LocalStatusProvider status;
+
+ @Override
+ public void startService() throws Exception {
+ log.debug("creating local container status provider");
+ status = new LocalStatusProvider();
+ status.configure(properties);
+ status.start();
+
+ StatusWrapper.this.status = status;
+ }
+
+ @Override
+ public void stopService() throws Exception {
+ status.stop();
+ }
+ }
+
+ class ShellWrapperImpl implements WrapperImpl {
+
+ ShellStatusProvider status;
+
+ @Override
+ public void startService() throws Exception {
+ log.debug("creating shell container status provider");
+ status = new ShellStatusProvider();
+ status.configure(properties);
+ status.start();
+ StatusWrapper.this.status = status;
+ }
+
+ @Override
+ public void stopService() throws Exception {
+ status.stop();
+ }
+ }
+
+ class YarnWrapperImpl implements WrapperImpl {
+
+ YarnStatusProvider status;
+
+ @Override
+ public void startService() throws Exception {
+ String metadata = properties.getProperty("metadata");
+
+ log.debug(String.format("creating yarn container status provider (metadata='%s')", metadata));
+ status = new YarnStatusProvider();
+ status.configure(properties);
+ status.start();
+
+ StatusWrapper.this.status = status;
+ }
+
+ @Override
+ public void stopService() throws Exception {
+ status.stop();
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/TargetWrapper.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/TargetWrapper.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/TargetWrapper.java
new file mode 100644
index 0000000..5920fc4
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/TargetWrapper.java
@@ -0,0 +1,117 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.util.Properties;
+
+import org.apache.helix.metamanager.TargetProvider;
+import org.apache.helix.metamanager.TargetProviderService;
+import org.apache.helix.metamanager.impl.FileTargetProvider;
+import org.apache.helix.metamanager.impl.RedisTargetProvider;
+import org.apache.helix.metamanager.impl.StaticTargetProvider;
+import org.apache.log4j.Logger;
+
+public class TargetWrapper {
+
+ static final Logger log = Logger.getLogger(TargetWrapper.class);
+
+ WrapperImpl impl;
+ Properties properties;
+ TargetProviderService target;
+
+ public TargetWrapper(Properties properties) {
+ this.properties = properties;
+ }
+
+ public void startService() throws Exception {
+ String type = (String) properties.get("type");
+
+ log.info(String.format("starting target service (type='%s')", type));
+
+ if ("static".equals(type)) {
+ impl = new StaticWrapperImpl();
+
+ } else if ("file".equals(type)) {
+ impl = new FileWrapperImpl();
+
+ } else if ("redis".equals(type)) {
+ impl = new RedisWrapperImpl();
+
+ } else {
+ throw new IllegalArgumentException(String.format("type '%s' not supported", type));
+ }
+
+ impl.startService();
+ }
+
+ public void stopService() throws Exception {
+ log.info("stopping target service");
+ impl.stopService();
+ target = null;
+ }
+
+ public TargetProvider getTarget() {
+ return target;
+ }
+
+ static interface WrapperImpl {
+ void startService() throws Exception;
+
+ void stopService() throws Exception;
+ }
+
+ private class StaticWrapperImpl implements WrapperImpl {
+ @Override
+ public void startService() throws Exception {
+ log.debug("creating static target provider");
+ Properties prop = new Properties();
+ prop.putAll(properties);
+ prop.remove("type");
+
+ target = new StaticTargetProvider();
+ target.configure(prop);
+ target.start();
+ }
+
+ @Override
+ public void stopService() throws Exception {
+ target.stop();
+ }
+ }
+
+ private class FileWrapperImpl implements WrapperImpl {
+ @Override
+ public void startService() throws Exception {
+ log.debug("creating file target provider");
+ Properties prop = new Properties();
+ prop.putAll(properties);
+ prop.remove("type");
+
+ target = new FileTargetProvider();
+ target.configure(prop);
+ target.start();
+ }
+
+ @Override
+ public void stopService() throws Exception {
+ target.stop();
+ }
+ }
+
+ private class RedisWrapperImpl implements WrapperImpl {
+ @Override
+ public void startService() throws Exception {
+ log.debug("creating redis target provider");
+ Properties prop = new Properties();
+ prop.putAll(properties);
+ prop.remove("type");
+
+ target = new RedisTargetProvider();
+ target.configure(prop);
+ target.start();
+ }
+
+ @Override
+ public void stopService() throws Exception {
+ target.stop();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ZookeeperWrapper.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ZookeeperWrapper.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ZookeeperWrapper.java
new file mode 100644
index 0000000..eca7fab
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrap/ZookeeperWrapper.java
@@ -0,0 +1,57 @@
+package org.apache.helix.metamanager.bootstrap;
+
+import java.io.File;
+import java.util.Properties;
+
+import org.I0Itec.zkclient.IDefaultNameSpace;
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkServer;
+import org.apache.commons.io.FileUtils;
+import org.apache.log4j.Logger;
+
+public class ZookeeperWrapper {
+
+ static final Logger log = Logger.getLogger(ZookeeperWrapper.class);
+
+ ZkServer server;
+ Properties properties;
+
+ public ZookeeperWrapper(Properties properties) {
+ this.properties = properties;
+ }
+
+ public void startService() {
+ String dataDir = properties.getProperty("datadir");
+ String logDir = properties.getProperty("logdir");
+ int port = Integer.parseInt(properties.getProperty("port"));
+
+ log.info(String.format("starting zookeeper service (dataDir='%s', logDir='%s', port=%d)", dataDir, logDir, port));
+
+ FileUtils.deleteQuietly(new File(dataDir));
+ FileUtils.deleteQuietly(new File(logDir));
+
+ IDefaultNameSpace defaultNameSpace = new IDefaultNameSpace() {
+ @Override
+ public void createDefaultNameSpace(ZkClient zkClient) {
+ // left blank
+ }
+ };
+
+ server = new ZkServer(dataDir, logDir, defaultNameSpace, port);
+ server.start();
+ }
+
+ public void stopService() {
+ log.info("stopping zookeeper service");
+
+ if (server != null) {
+ server.shutdown();
+ server = null;
+ }
+ }
+
+ public ZkServer getZookeeper() {
+ return server;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/Boot.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/Boot.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/Boot.java
new file mode 100644
index 0000000..004573d
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/Boot.java
@@ -0,0 +1,132 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.Service;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+/**
+ * Bootstrapper for elastic cluster deployment using *.properties configuration
+ * files. (Program entry point)
+ *
+ */
+public class Boot implements Service {
+
+ static final Logger log = Logger.getLogger(Boot.class);
+
+ static final Map<String, Class<? extends Service>> classes = new HashMap<String, Class<? extends Service>>();
+ static {
+ classes.put("zookeeper", ZookeeperService.class);
+ classes.put("cluster", ClusterService.class);
+ classes.put("resource", ResourceService.class);
+ classes.put("controller", ControllerService.class);
+ classes.put("metacluster", MetaClusterService.class);
+ classes.put("metaresource", MetaResourceService.class);
+ classes.put("metaprovider", MetaProviderService.class);
+ classes.put("metacontroller", MetaControllerService.class);
+ }
+
+ static final List<String> serviceOrder = Arrays.asList("zookeeper", "cluster", "resource", "metacluster", "metaresource",
+ "metaprovider", "controller", "metacontroller");
+
+ Properties properties;
+ List<Service> services = Lists.newArrayList();
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ Preconditions.checkNotNull(properties);
+ this.properties = properties;
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.info(String.format("bootstraping started"));
+
+ for (String key : serviceOrder) {
+ if (BootUtils.hasNamespace(properties, key + ".0")) {
+ processIndexedNamespace(key);
+ } else if (BootUtils.hasNamespace(properties, key)) {
+ processNamespace(key);
+ }
+ }
+
+ log.info(String.format("bootstraping completed"));
+ }
+
+ private void processIndexedNamespace(String key) throws Exception {
+ int i = 0;
+ String indexedKey = key + "." + i;
+
+ while (BootUtils.hasNamespace(properties, indexedKey)) {
+ log.info(String.format("processing namespace '%s'", indexedKey));
+ Service service = BootUtils.createInstance(classes.get(key));
+ service.configure(BootUtils.getNamespace(properties, indexedKey));
+ service.start();
+
+ services.add(service);
+
+ i++;
+ indexedKey = key + "." + i;
+ }
+ }
+
+ private void processNamespace(String key) throws Exception {
+ log.info(String.format("processing namespace '%s'", key));
+ Service service = BootUtils.createInstance(classes.get(key));
+ service.configure(BootUtils.getNamespace(properties, key));
+ service.start();
+
+ services.add(service);
+ }
+
+ @Override
+ public void stop() throws Exception {
+ log.info(String.format("shutdown started"));
+
+ Collections.reverse(services);
+ for (Service service : services) {
+ service.stop();
+ }
+
+ log.info(String.format("shutdown completed"));
+ }
+
+ public Collection<Service> getServcies() {
+ return services;
+ }
+
+ public static void main(String[] args) throws Exception {
+ if (args.length < 1) {
+ log.error(String.format("Usage: Boot properties_path"));
+ return;
+ }
+
+ String resourcePath = args[0];
+
+ log.info(String.format("reading definition from '%s'", resourcePath));
+ Properties properties = new Properties();
+ properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+
+ final Boot boot = new Boot();
+ boot.configure(properties);
+ boot.start();
+
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ log.debug("Running shutdown hook");
+ try { boot.stop(); } catch (Exception ignore) {}
+ }
+ }));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/BootUtils.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/BootUtils.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/BootUtils.java
new file mode 100644
index 0000000..2fb9ff6
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/BootUtils.java
@@ -0,0 +1,104 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.Collection;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+/**
+ * Utility for instantiating bootstrapping services and parsing hierarchical
+ * properties files.
+ *
+ */
+public class BootUtils {
+
+ public static final String CLASS_PROPERTY = "class";
+ static final Logger log = Logger.getLogger(BootUtils.class);
+
+ public static boolean hasNamespace(Properties properties, String namespace) {
+ String prefix = namespace + ".";
+ for (String key : properties.stringPropertyNames()) {
+ if (key.startsWith(prefix))
+ return true;
+ }
+ return false;
+ }
+
+ public static Set<String> getNamespaces(Properties properties) {
+ Pattern pattern = Pattern.compile("^([^\\.\\=]+)");
+
+ Set<String> namespaces = Sets.newHashSet();
+
+ for (Map.Entry<Object, Object> rawEntry : properties.entrySet()) {
+ String key = (String) rawEntry.getKey();
+
+ Matcher matcher = pattern.matcher(key);
+ if (matcher.find()) {
+ namespaces.add(matcher.group(1));
+ }
+ }
+
+ return namespaces;
+ }
+
+ public static Properties getNamespace(Properties source, String namespace) {
+ Properties dest = new Properties();
+ String prefix = namespace + ".";
+
+ for (Map.Entry<Object, Object> rawEntry : source.entrySet()) {
+ String key = (String) rawEntry.getKey();
+ String value = (String) rawEntry.getValue();
+
+ if (key.startsWith(prefix)) {
+ String newKey = key.substring(prefix.length());
+ dest.put(newKey, value);
+ }
+ }
+
+ return dest;
+ }
+
+ public static Collection<Properties> getContainerProps(Properties properties) {
+ Collection<Properties> containerProps = Lists.newArrayList();
+
+ String containers = properties.getProperty("containers");
+ String containerTypes[] = StringUtils.split(containers, ",");
+
+ for (String containerType : containerTypes) {
+ Properties containerProp = BootUtils.getNamespace(BootUtils.getNamespace(properties, "container"), containerType);
+ log.debug(String.format("adding container type (type='%s', properties='%s')", containerType, containerProp));
+ containerProps.add(containerProp);
+ }
+
+ return containerProps;
+ }
+
+ @SuppressWarnings("unchecked")
+ public static <T> T createInstance(Class<?> clazz) throws Exception {
+ try {
+ log.debug(String.format("checking for default constructor in class '%s'", clazz.getSimpleName()));
+ return (T) clazz.getConstructor().newInstance();
+ } catch (Exception e) {
+ log.debug("no default constructor found");
+ }
+
+ throw new Exception(String.format("no suitable constructor for class '%s'", clazz.getSimpleName()));
+ }
+
+ public static <T> T createInstance(String className) throws Exception {
+ return createInstance(Class.forName(className));
+ }
+
+ private BootUtils() {
+ // left blank
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ClusterService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ClusterService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ClusterService.java
new file mode 100644
index 0000000..5b3ec7e
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ClusterService.java
@@ -0,0 +1,46 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+/**
+ * Cluster bootstrapping. Create Helix data structures in zookeeper for the
+ * managed cluster.
+ *
+ */
+public class ClusterService implements Service {
+
+ static final Logger log = Logger.getLogger(ClusterService.class);
+
+ String name;
+ String address;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ name = properties.getProperty("name", "cluster");
+ address = properties.getProperty("address", "localhost:2199");
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.info(String.format("setting up '%s/%s'", address, name));
+ HelixAdmin admin = new ZKHelixAdmin(address);
+ admin.addCluster(name, false);
+ admin.addStateModelDef(name, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+ admin.addStateModelDef(name, "MasterSlave", new StateModelDefinition(StateModelConfigGenerator.generateConfigForMasterSlave()));
+ admin.close();
+ log.info("setup complete");
+ }
+
+ @Override
+ public void stop() throws Exception {
+ // left blank
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ControllerService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ControllerService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ControllerService.java
new file mode 100644
index 0000000..2a95ecf
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ControllerService.java
@@ -0,0 +1,50 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.Properties;
+import java.util.concurrent.ScheduledExecutorService;
+
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.metamanager.Service;
+import org.apache.log4j.Logger;
+
+/**
+ * Helix controller bootstrapping and management. Create standalone controller
+ * for managed Helix cluster.
+ *
+ */
+public class ControllerService implements Service {
+
+ static final Logger log = Logger.getLogger(ControllerService.class);
+
+ String name;
+ String cluster;
+ String address;
+
+ HelixManager manager;
+
+ ScheduledExecutorService executor;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ name = properties.getProperty("name", "controller");
+ cluster = properties.getProperty("cluster", "cluster");
+ address = properties.getProperty("address", "localhost:2199");
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.info(String.format("starting controller '%s' at '%s/%s'", name, address, cluster));
+ manager = HelixControllerMain.startHelixController(address, cluster, name, HelixControllerMain.STANDALONE);
+ }
+
+ @Override
+ public void stop() throws Exception {
+ if (manager != null) {
+ log.info(String.format("stopping controller '%s' at '%s/%s'", name, address, cluster));
+ manager.disconnect();
+ manager = null;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaClusterService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaClusterService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaClusterService.java
new file mode 100644
index 0000000..340c961
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaClusterService.java
@@ -0,0 +1,61 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+/**
+ * Meta cluster bootstrapping. Create Helix data structures in zookeeper for
+ * the meta cluster.
+ *
+ */
+public class MetaClusterService implements Service {
+
+ static final Logger log = Logger.getLogger(MetaClusterService.class);
+
+ String name;
+ String address;
+ String managedCluster;
+ String managedAddress;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ name = properties.getProperty("name", "metacluster");
+ address = properties.getProperty("address", "localhost:2199");
+ managedCluster = properties.getProperty("managedcluster", "cluster");
+ managedAddress = properties.getProperty("managedaddress", "localhost:2199");
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.info(String.format("setting up '%s/%s'", address, name));
+ HelixAdmin admin = new ZKHelixAdmin(address);
+ admin.addCluster(name, false);
+ admin.addStateModelDef(name, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+
+ HelixConfigScope scope = new HelixConfigScopeBuilder(ConfigScopeProperty.CLUSTER, name).build();
+ Map<String, String> properties = new HashMap<String, String>();
+ properties.put("cluster", managedCluster);
+ properties.put("address", managedAddress);
+ admin.setConfig(scope, properties);
+
+ admin.close();
+ log.info("setup complete");
+ }
+
+ @Override
+ public void stop() throws Exception {
+ // left blank
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaControllerService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaControllerService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaControllerService.java
new file mode 100644
index 0000000..a12753c
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaControllerService.java
@@ -0,0 +1,114 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.Properties;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.StatusProviderService;
+import org.apache.helix.metamanager.TargetProviderService;
+import org.apache.helix.metamanager.provider.ProviderRebalancerSingleton;
+import org.apache.helix.model.IdealState;
+import org.apache.log4j.Logger;
+
+/**
+ * Meta cluster controller bootstrapping and management. Create standalone
+ * controller for Helix meta cluster. Spawn StatusProvider and TargetProvider
+ * and trigger periodic status refresh in meta cluster.
+ *
+ */
+public class MetaControllerService implements Service {
+
+ static final Logger log = Logger.getLogger(MetaControllerService.class);
+
+ String name;
+ String metacluster;
+ String metaaddress;
+ long autorefresh;
+
+ HelixManager manager;
+ StatusProviderService statusService;
+ TargetProviderService targetService;
+ ScheduledExecutorService executor;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ name = properties.getProperty("name", "controller");
+ metacluster = properties.getProperty("metacluster", "metacluster");
+ metaaddress = properties.getProperty("metaaddress", "localhost:2199");
+ autorefresh = Long.valueOf(properties.getProperty("autorefresh", "0"));
+
+ Properties statusProperties = BootUtils.getNamespace(properties, "status");
+ statusService = BootUtils.createInstance(Class.forName(statusProperties.getProperty("class")));
+ statusService.configure(statusProperties);
+ ProviderRebalancerSingleton.setStatusProvider(statusService);
+
+ Properties targetProperties = BootUtils.getNamespace(properties, "target");
+ targetService = BootUtils.createInstance(Class.forName(targetProperties.getProperty("class")));
+ targetService.configure(targetProperties);
+ ProviderRebalancerSingleton.setTargetProvider(targetService);
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.debug("Starting status service");
+ statusService.start();
+
+ log.debug("Starting target service");
+ targetService.start();
+
+ log.info(String.format("starting controller '%s' at '%s/%s'", name, metaaddress, metacluster));
+ manager = HelixControllerMain.startHelixController(metaaddress, metacluster, name, HelixControllerMain.STANDALONE);
+
+ if (autorefresh > 0) {
+ log.debug(String.format("installing autorefresh with interval %d ms", autorefresh));
+ executor = Executors.newSingleThreadScheduledExecutor();
+ executor.scheduleAtFixedRate(new RefreshRunnable(), autorefresh, autorefresh, TimeUnit.MILLISECONDS);
+ }
+ }
+
+ @Override
+ public void stop() throws Exception {
+ if (executor != null) {
+ executor.shutdownNow();
+ while (!executor.isTerminated()) {
+ Thread.sleep(100);
+ }
+ executor = null;
+ }
+ if (manager != null) {
+ log.info(String.format("Stopping controller '%s' at '%s/%s'", name, metaaddress, metacluster));
+ manager.disconnect();
+ manager = null;
+ }
+ if (targetService != null) {
+ log.debug("Stopping target service");
+ targetService.stop();
+ targetService = null;
+ }
+ if (statusService != null) {
+ log.debug("Stopping status service");
+ statusService.stop();
+ statusService = null;
+ }
+ }
+
+ private class RefreshRunnable implements Runnable {
+ @Override
+ public void run() {
+ log.debug("running status refresh");
+ HelixAdmin admin = manager.getClusterManagmentTool();
+
+ for (String metaResource : admin.getResourcesInCluster(metacluster)) {
+ log.debug(String.format("refreshing meta resource '%s'", metaResource));
+
+ IdealState poke = admin.getResourceIdealState(metacluster, metaResource);
+ admin.setResourceIdealState(metacluster, metaResource, poke);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaProviderService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaProviderService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaProviderService.java
new file mode 100644
index 0000000..0b68580
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaProviderService.java
@@ -0,0 +1,81 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.provider.ProviderProperties;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Lists;
+
+/**
+ * ContainerProvider bootstrapping and management. Create container provider
+ * participant, configure with container properties from meta resources and
+ * connect to meta cluster.
+ *
+ */
+public class MetaProviderService implements Service {
+
+ static final Logger log = Logger.getLogger(MetaProviderService.class);
+
+ Service service;
+
+ String clazz;
+ String metaAddress;
+ String metaCluster;
+
+ ProviderProperties config;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ clazz = properties.getProperty("class");
+ metaAddress = properties.getProperty("metaaddress", "localhost:2199");
+ metaCluster = properties.getProperty("metacluster", "metacluster");
+
+ config = new ProviderProperties();
+ config.putAll(properties);
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.info(String.format("starting service '%s' (config=%s)", clazz, config));
+
+ HelixAdmin admin = new ZKHelixAdmin(metaAddress);
+
+ HelixConfigScope managedScope = new HelixConfigScopeBuilder(ConfigScopeProperty.CLUSTER, metaCluster).build();
+ Map<String, String> managedProps = admin.getConfig(managedScope, Lists.newArrayList("cluster", "address"));
+ config.putAll(managedProps);
+
+ for (String resource : admin.getResourcesInCluster(metaCluster)) {
+ HelixConfigScope resScope = new HelixConfigScopeBuilder(ConfigScopeProperty.RESOURCE, metaCluster, resource).build();
+ List<String> resKeys = admin.getConfigKeys(resScope);
+ Map<String, String> resProps = admin.getConfig(resScope, resKeys);
+
+ Properties properties = new Properties();
+ properties.putAll(resProps);
+
+ config.addContainer(resource, properties);
+ }
+
+ service = BootUtils.createInstance(clazz);
+ service.configure(config);
+ service.start();
+ }
+
+ @Override
+ public void stop() throws Exception {
+ log.info(String.format("stopping service '%s' (config=%s)", clazz, config));
+ if (service != null) {
+ service.stop();
+ service = null;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaResourceService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaResourceService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaResourceService.java
new file mode 100644
index 0000000..c8f0664
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaResourceService.java
@@ -0,0 +1,87 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.provider.ProviderRebalancer;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Maps;
+
+/**
+ * Bootstrapping meta resource. Create container type configuration in Helix
+ * zookeeper namespace.
+ *
+ */
+public class MetaResourceService implements Service {
+
+ static final Logger log = Logger.getLogger(MetaResourceService.class);
+
+ String metaCluster;
+ String metaAddress;
+ String name;
+ Map<String, String> config;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ metaCluster = properties.getProperty("metacluster", "metacluster");
+ metaAddress = properties.getProperty("metaaddress", "localhost:2199");
+ name = properties.getProperty("name", "container");
+
+ this.config = new HashMap<String, String>();
+ for (Map.Entry<Object, Object> entry : properties.entrySet()) {
+ this.config.put((String) entry.getKey(), (String) entry.getValue());
+ }
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.info(String.format("setting up meta resource '%s' at '%s/%s'", name, metaAddress, metaCluster));
+ HelixAdmin admin = new ZKHelixAdmin(metaAddress);
+
+ log.info(String.format("setting up container '%s' (config='%s')", name, config));
+
+ admin.addResource(metaCluster, name, 1, "OnlineOffline", RebalanceMode.USER_DEFINED.toString());
+ IdealState idealState = admin.getResourceIdealState(metaCluster, name);
+ idealState.setRebalancerClassName(ProviderRebalancer.class.getName());
+ idealState.setReplicas("1");
+
+ // BEGIN workaround
+ // FIXME workaround for HELIX-226
+ Map<String, List<String>> listFields = Maps.newHashMap();
+ Map<String, Map<String, String>> mapFields = Maps.newHashMap();
+ for (int i = 0; i < 256; i++) {
+ String partitionName = name + "_" + i;
+ listFields.put(partitionName, new ArrayList<String>());
+ mapFields.put(partitionName, new HashMap<String, String>());
+ }
+ idealState.getRecord().setListFields(listFields);
+ idealState.getRecord().setMapFields(mapFields);
+ // END workaround
+
+ admin.setResourceIdealState(metaCluster, name, idealState);
+
+ HelixConfigScope scope = new HelixConfigScopeBuilder(ConfigScopeProperty.RESOURCE, metaCluster, name).build();
+ admin.setConfig(scope, this.config);
+
+ admin.close();
+ log.info("setup complete");
+ }
+
+ @Override
+ public void stop() throws Exception {
+ // left blank
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaService.java
new file mode 100644
index 0000000..2e5e686
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/MetaService.java
@@ -0,0 +1,80 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.metamanager.bootstrap.BootUtils;
+import org.apache.helix.model.HelixConfigScope;
+import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
+import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Lists;
+
+public class MetaService implements Service {
+
+ static final Logger log = Logger.getLogger(MetaService.class);
+
+ Service service;
+
+ String clazz;
+ String metaAddress;
+
+ String metaCluster;
+
+ Properties config;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ clazz = properties.getProperty("class");
+ metaAddress = properties.getProperty("metaaddress", "localhost:2199");
+ metaCluster = properties.getProperty("metacluster", "metacluster");
+
+ this.config = new Properties();
+ this.config.putAll(properties);
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.info(String.format("starting service '%s' (config=%s)", clazz, config));
+
+ HelixAdmin admin = new ZKHelixAdmin(metaAddress);
+
+ HelixConfigScope managedScope = new HelixConfigScopeBuilder(ConfigScopeProperty.CLUSTER, metaCluster).build();
+ Map<String, String> managedProps = admin.getConfig(managedScope, Lists.newArrayList("cluster", "address"));
+ config.putAll(managedProps);
+
+ Collection<String> resources = admin.getResourcesInCluster(metaCluster);
+ config.put("containers", StringUtils.join(resources, ","));
+
+ for(String resource : admin.getResourcesInCluster(metaCluster)) {
+ HelixConfigScope resScope = new HelixConfigScopeBuilder(ConfigScopeProperty.RESOURCE, metaCluster, resource).build();
+ List<String> resKeys = admin.getConfigKeys(resScope);
+ Map<String, String> resProps = admin.getConfig(resScope, resKeys);
+
+ for(Map.Entry<String, String> entry : resProps.entrySet()) {
+ config.put(resource + "." + entry.getKey(), entry.getValue());
+ }
+ }
+
+ service = BootUtils.createInstance(clazz);
+ service.configure(config);
+ service.start();
+ }
+
+ @Override
+ public void stop() throws Exception {
+ log.info(String.format("stopping service '%s' (config=%s)", clazz, config));
+ if (service != null) {
+ service.stop();
+ service = null;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ResourceService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ResourceService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ResourceService.java
new file mode 100644
index 0000000..35bed91
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ResourceService.java
@@ -0,0 +1,61 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.metamanager.Service;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.log4j.Logger;
+
+/**
+ * Bootstrapping Helix resource. Create resource in Helix and configure
+ * properties.
+ *
+ */
+public class ResourceService implements Service {
+
+ static final Logger log = Logger.getLogger(ResourceService.class);
+
+ String cluster;
+ String address;
+ String container;
+ String name;
+ String model;
+ int partitions;
+ int replica;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ cluster = properties.getProperty("cluster", "cluster");
+ address = properties.getProperty("address", "localhost:2199");
+ name = properties.getProperty("name", "resource");
+ container = properties.getProperty("container", "container");
+ model = properties.getProperty("model", "OnlineOffline");
+ partitions = Integer.parseInt(properties.getProperty("partitions", "1"));
+ replica = Integer.parseInt(properties.getProperty("replica", "1"));
+ }
+
+ @Override
+ public void start() throws Exception {
+ log.info(String.format("setting up resource '%s' at '%s/%s'", name, address, cluster));
+ HelixAdmin admin = new ZKHelixAdmin(address);
+
+ log.info(String.format("setting up resource '%s' (container='%s', model='%s', partitions=%d, replica=%d)", name, container, model, partitions, replica));
+
+ admin.addResource(cluster, name, partitions, model, RebalanceMode.FULL_AUTO.toString());
+ IdealState idealState = admin.getResourceIdealState(cluster, name);
+ idealState.setInstanceGroupTag(container);
+ idealState.setReplicas(String.valueOf(replica));
+ admin.setResourceIdealState(cluster, name, idealState);
+ admin.close();
+ log.info("setup complete");
+ }
+
+ @Override
+ public void stop() throws Exception {
+ // left blank
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ZookeeperService.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ZookeeperService.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ZookeeperService.java
new file mode 100644
index 0000000..b220dc8
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/bootstrapper/ZookeeperService.java
@@ -0,0 +1,64 @@
+package org.apache.helix.metamanager.bootstrapper;
+
+import java.io.File;
+import java.util.Properties;
+
+import org.I0Itec.zkclient.IDefaultNameSpace;
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkServer;
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.metamanager.Service;
+import org.apache.log4j.Logger;
+
+/**
+ * Bootstrapping zookeeper. Convenience tool for creating standalone zookeeper
+ * instance for test deployments. For production use a separate zookeeper
+ * cluster is strongly recommended.
+ *
+ */
+public class ZookeeperService implements Service {
+
+ static final Logger log = Logger.getLogger(ZookeeperService.class);
+
+ String dataDir;
+ String logDir;
+ int port;
+
+ ZkServer server;
+
+ @Override
+ public void configure(Properties properties) throws Exception {
+ dataDir = properties.getProperty("datadir", "/tmp/zk/data");
+ logDir = properties.getProperty("logdir", "/tmp/zk/log");
+ port = Integer.parseInt(properties.getProperty("port", "2199"));
+ }
+
+ @Override
+ public void start() {
+ log.info(String.format("starting zookeeper service (dataDir='%s', logDir='%s', port=%d)", dataDir, logDir, port));
+
+ FileUtils.deleteQuietly(new File(dataDir));
+ FileUtils.deleteQuietly(new File(logDir));
+
+ IDefaultNameSpace defaultNameSpace = new IDefaultNameSpace() {
+ @Override
+ public void createDefaultNameSpace(ZkClient zkClient) {
+ // left blank
+ }
+ };
+
+ server = new ZkServer(dataDir, logDir, defaultNameSpace, port);
+ server.start();
+ }
+
+ @Override
+ public void stop() {
+ log.info("stopping zookeeper service");
+
+ if (server != null) {
+ server.shutdown();
+ server = null;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/FileTargetProvider.java
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/FileTargetProvider.java b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/FileTargetProvider.java
new file mode 100644
index 0000000..b70d9ba
--- /dev/null
+++ b/recipes/meta-cluster-manager/src/main/java/org/apache/helix/metamanager/cluster/FileTargetProvider.java
@@ -0,0 +1,29 @@
+package org.apache.helix.metamanager.cluster;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.helix.metamanager.ClusterStatusProvider;
+
+
+public class FileTargetProvider implements ClusterStatusProvider {
+
+ final File file;
+
+ public FileTargetProvider(Properties properties) {
+ this.file = new File(properties.getProperty("path"));
+ }
+
+ @Override
+ public int getTargetContainerCount(String containerType) throws FileNotFoundException, IOException, IllegalArgumentException {
+ Properties properties = new Properties();
+ properties.load(new FileReader(file));
+ if(!properties.contains(containerType))
+ throw new IllegalArgumentException(String.format("container type '%s' not found in '%s'", containerType, file.getCanonicalPath()));
+ return Integer.parseInt((String)properties.get(containerType));
+ }
+
+}
[15/15] git commit: Adding Helix-task-framework and Yarn integration
modules
Posted by ki...@apache.org.
Adding Helix-task-framework and Yarn integration modules
Project: http://git-wip-us.apache.org/repos/asf/incubator-helix/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-helix/commit/e38aa54b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-helix/tree/e38aa54b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-helix/diff/e38aa54b
Branch: refs/heads/helix-yarn
Commit: e38aa54b07453d3dd1690317cb5e39efe5a4b79c
Parents: 84fb26b
Author: Kishore Gopalakrishna <g....@gmail.com>
Authored: Fri Sep 20 11:29:43 2013 -0700
Committer: Kishore Gopalakrishna <g....@gmail.com>
Committed: Fri Sep 20 11:29:43 2013 -0700
----------------------------------------------------------------------
.../main/java/org/apache/helix/ZNRecord.java | 21 +-
.../controller/GenericHelixController.java | 1 -
.../stages/CurrentStateComputationStage.java | 15 +-
.../controller/stages/CurrentStateOutput.java | 62 +-
.../handling/HelixStateTransitionHandler.java | 83 ++-
.../messaging/handling/HelixTaskResult.java | 9 +
.../org/apache/helix/model/CurrentState.java | 36 +
.../apache/helix/model/ResourceAssignment.java | 14 +
.../java/org/apache/helix/task/TargetState.java | 25 +
.../main/java/org/apache/helix/task/Task.java | 25 +
.../java/org/apache/helix/task/TaskConfig.java | 333 +++++++++
.../org/apache/helix/task/TaskConstants.java | 31 +
.../java/org/apache/helix/task/TaskContext.java | 147 ++++
.../java/org/apache/helix/task/TaskDag.java | 157 ++++
.../java/org/apache/helix/task/TaskDriver.java | 382 ++++++++++
.../java/org/apache/helix/task/TaskFactory.java | 23 +
.../apache/helix/task/TaskPartitionState.java | 31 +
.../org/apache/helix/task/TaskRebalancer.java | 736 +++++++++++++++++++
.../java/org/apache/helix/task/TaskResult.java | 63 ++
.../java/org/apache/helix/task/TaskRunner.java | 190 +++++
.../java/org/apache/helix/task/TaskState.java | 31 +
.../org/apache/helix/task/TaskStateModel.java | 266 +++++++
.../helix/task/TaskStateModelFactory.java | 34 +
.../java/org/apache/helix/task/TaskUtil.java | 161 ++++
.../java/org/apache/helix/task/Workflow.java | 261 +++++++
.../org/apache/helix/task/WorkflowConfig.java | 116 +++
.../org/apache/helix/task/WorkflowContext.java | 110 +++
.../org/apache/helix/task/beans/TaskBean.java | 30 +
.../apache/helix/task/beans/WorkflowBean.java | 21 +
.../org/apache/helix/tools/ClusterSetup.java | 2 +
.../helix/tools/StateModelConfigGenerator.java | 96 ++-
.../org/apache/helix/DummyProcessThread.java | 12 +-
.../integration/ZkIntegrationTestBase.java | 3 +-
.../integration/task/TestTaskRebalancer.java | 330 +++++++++
.../task/TestTaskRebalancerStopResume.java | 231 ++++++
.../apache/helix/integration/task/TestUtil.java | 128 ++++
.../integration/task/WorkflowGenerator.java | 76 ++
recipes/auto-scale/README.md | 82 +++
recipes/auto-scale/pom.xml | 210 ++++++
.../auto-scale/src/main/assembly/assembly.xml | 32 +
.../auto-scale/src/main/config/log4j.properties | 30 +
.../apache/helix/autoscale/ClusterAdmin.java | 30 +
.../helix/autoscale/ContainerProvider.java | 40 +
.../autoscale/ContainerProviderService.java | 9 +
.../helix/autoscale/HelixClusterAdmin.java | 43 ++
.../org/apache/helix/autoscale/Service.java | 38 +
.../apache/helix/autoscale/StatusProvider.java | 35 +
.../helix/autoscale/StatusProviderService.java | 9 +
.../apache/helix/autoscale/TargetProvider.java | 25 +
.../helix/autoscale/TargetProviderService.java | 9 +
.../apache/helix/autoscale/ZookeeperSetter.java | 30 +
.../helix/autoscale/bootstrapper/Boot.java | 132 ++++
.../helix/autoscale/bootstrapper/BootUtils.java | 104 +++
.../autoscale/bootstrapper/ClusterService.java | 46 ++
.../bootstrapper/ControllerService.java | 50 ++
.../bootstrapper/MetaClusterService.java | 61 ++
.../bootstrapper/MetaControllerService.java | 114 +++
.../bootstrapper/MetaProviderService.java | 81 ++
.../bootstrapper/MetaResourceService.java | 87 +++
.../autoscale/bootstrapper/ResourceService.java | 61 ++
.../bootstrapper/ZookeeperService.java | 64 ++
.../autoscale/container/ContainerProcess.java | 133 ++++
.../container/ContainerProcessProperties.java | 66 ++
.../autoscale/container/ContainerUtils.java | 46 ++
.../autoscale/impl/FileTargetProvider.java | 51 ++
.../autoscale/impl/RedisTargetProvider.java | 356 +++++++++
.../autoscale/impl/StaticTargetProvider.java | 62 ++
.../impl/container/DummyMasterSlaveProcess.java | 76 ++
.../container/DummyOnlineOfflineProcess.java | 66 ++
.../impl/container/RedisServerProcess.java | 140 ++++
.../container/ZookeeperMasterSlaveProcess.java | 108 +++
.../impl/local/LocalContainerProvider.java | 119 +++
.../local/LocalContainerProviderProcess.java | 45 ++
.../impl/local/LocalContainerSingleton.java | 56 ++
.../impl/local/LocalStatusProvider.java | 53 ++
.../impl/shell/ShellContainerProcess.java | 93 +++
.../impl/shell/ShellContainerProvider.java | 151 ++++
.../shell/ShellContainerProviderProcess.java | 45 ++
.../impl/shell/ShellContainerSingleton.java | 58 ++
.../impl/shell/ShellStatusProvider.java | 64 ++
.../helix/autoscale/impl/shell/ShellUtils.java | 54 ++
.../autoscale/impl/yarn/YarnContainerData.java | 86 +++
.../impl/yarn/YarnContainerProcess.java | 53 ++
.../yarn/YarnContainerProcessProperties.java | 40 +
.../impl/yarn/YarnContainerProvider.java | 143 ++++
.../impl/yarn/YarnContainerProviderProcess.java | 158 ++++
.../yarn/YarnContainerProviderProperties.java | 64 ++
.../impl/yarn/YarnContainerService.java | 156 ++++
.../autoscale/impl/yarn/YarnDataProvider.java | 73 ++
.../autoscale/impl/yarn/YarnMasterProcess.java | 144 ++++
.../impl/yarn/YarnMasterProperties.java | 13 +
.../autoscale/impl/yarn/YarnMasterService.java | 414 +++++++++++
.../autoscale/impl/yarn/YarnStatusProvider.java | 67 ++
.../helix/autoscale/impl/yarn/YarnUtils.java | 174 +++++
.../impl/yarn/ZookeeperYarnDataProvider.java | 100 +++
.../autoscale/provider/ProviderProcess.java | 82 +++
.../autoscale/provider/ProviderProperties.java | 97 +++
.../autoscale/provider/ProviderRebalancer.java | 352 +++++++++
.../provider/ProviderRebalancerSingleton.java | 38 +
.../autoscale/provider/ProviderStateModel.java | 114 +++
.../provider/ProviderStateModelFactory.java | 27 +
.../src/main/resources/Boot2By2Local.properties | 87 +++
.../src/main/resources/Boot2By2Shell.properties | 87 +++
.../src/main/resources/Boot2By2Yarn.properties | 98 +++
.../src/main/resources/BootLocal.properties | 68 ++
.../main/resources/RedisYarnSample.properties | 89 +++
.../src/main/resources/log4j.properties | 30 +
recipes/auto-scale/src/test/config/testng.xml | 27 +
.../apache/helix/autoscale/BootstrapperIT.java | 134 ++++
.../org/apache/helix/autoscale/FailoverIT.java | 195 +++++
.../autoscale/LocalContainerProviderIT.java | 80 ++
.../autoscale/ShellContainerProviderIT.java | 95 +++
.../org/apache/helix/autoscale/TestUtils.java | 443 +++++++++++
.../org/apache/helix/autoscale/TestUtilsUT.java | 63 ++
.../autoscale/YarnContainerProviderIT.java | 101 +++
.../src/test/resources/distributed.properties | 13 +
.../src/test/resources/log4j.properties | 30 +
.../src/test/resources/standalone.properties | 13 +
recipes/meta-cluster-manager/README.md | 82 +++
recipes/meta-cluster-manager/pom.xml | 210 ++++++
.../src/main/assembly/assembly.xml | 32 +
.../src/main/config/log4j.properties | 30 +
.../apache/helix/metamanager/ClusterAdmin.java | 30 +
.../metamanager/ClusterContainerProvider.java | 32 +
.../ClusterContainerStatusProvider.java | 7 +
.../metamanager/ClusterInstanceInjector.java | 6 +
.../metamanager/ClusterStatusProvider.java | 5 +
.../apache/helix/metamanager/ConfigTool.java | 47 ++
.../helix/metamanager/ContainerProvider.java | 40 +
.../metamanager/ContainerProviderService.java | 9 +
.../metamanager/ContainerStatusProvider.java | 7 +
.../helix/metamanager/FileStatusProvider.java | 27 +
.../helix/metamanager/HelixClusterAdmin.java | 43 ++
.../org/apache/helix/metamanager/Manager.java | 129 ++++
.../apache/helix/metamanager/ManagerDemo.java | 463 ++++++++++++
.../helix/metamanager/ManagerFactory.java | 39 +
.../helix/metamanager/ManagerProcess.java | 67 ++
.../helix/metamanager/ManagerRebalancer.java | 167 +++++
.../helix/metamanager/MetaManagerDemo.java | 457 ++++++++++++
.../org/apache/helix/metamanager/Service.java | 38 +
.../helix/metamanager/StaticStatusProvider.java | 28 +
.../helix/metamanager/StatusProvider.java | 35 +
.../metamanager/StatusProviderService.java | 9 +
.../helix/metamanager/TargetProvider.java | 25 +
.../metamanager/TargetProviderService.java | 9 +
.../helix/metamanager/ZookeeperSetter.java | 30 +
.../helix/metamanager/bootstrap/BootUtil.java | 58 ++
.../helix/metamanager/bootstrap/BootUtils.java | 127 ++++
.../metamanager/bootstrap/Bootstrapper.java | 93 +++
.../metamanager/bootstrap/ManagedCluster.java | 87 +++
.../metamanager/bootstrap/MetaCluster.java | 201 +++++
.../metamanager/bootstrap/ProviderWrapper.java | 162 ++++
.../metamanager/bootstrap/StatusWrapper.java | 122 +++
.../metamanager/bootstrap/TargetWrapper.java | 117 +++
.../metamanager/bootstrap/ZookeeperWrapper.java | 57 ++
.../helix/metamanager/bootstrapper/Boot.java | 132 ++++
.../metamanager/bootstrapper/BootUtils.java | 104 +++
.../bootstrapper/ClusterService.java | 46 ++
.../bootstrapper/ControllerService.java | 50 ++
.../bootstrapper/MetaClusterService.java | 61 ++
.../bootstrapper/MetaControllerService.java | 114 +++
.../bootstrapper/MetaProviderService.java | 81 ++
.../bootstrapper/MetaResourceService.java | 87 +++
.../metamanager/bootstrapper/MetaService.java | 80 ++
.../bootstrapper/ResourceService.java | 61 ++
.../bootstrapper/ZookeeperService.java | 64 ++
.../metamanager/cluster/FileTargetProvider.java | 29 +
.../cluster/RedisTargetProvider.java | 329 +++++++++
.../cluster/StaticTargetProvider.java | 41 ++
.../metamanager/container/ContainerProcess.java | 133 ++++
.../container/ContainerProcessProperties.java | 66 ++
.../container/ContainerStateModel.java | 64 ++
.../container/ContainerStateModelFactory.java | 30 +
.../metamanager/container/ContainerUtils.java | 46 ++
.../container/impl/DummyMasterSlaveProcess.java | 76 ++
.../impl/DummyOnlineOfflineProcess.java | 64 ++
.../container/impl/DummyProcess.java | 76 ++
.../container/impl/RedisServerProcess.java | 135 ++++
.../impl/ZookeeperMasterSlaveProcess.java | 104 +++
.../metamanager/impl/FileTargetProvider.java | 51 ++
.../metamanager/impl/RedisTargetProvider.java | 356 +++++++++
.../metamanager/impl/StaticTargetProvider.java | 62 ++
.../impl/container/DummyMasterSlaveProcess.java | 76 ++
.../container/DummyOnlineOfflineProcess.java | 66 ++
.../impl/container/RedisServerProcess.java | 140 ++++
.../container/ZookeeperMasterSlaveProcess.java | 108 +++
.../impl/local/LocalContainerProcess.java | 64 ++
.../impl/local/LocalContainerProvider.java | 119 +++
.../local/LocalContainerProviderProcess.java | 45 ++
.../impl/local/LocalContainerSingleton.java | 56 ++
.../local/LocalContainerStatusProvider.java | 37 +
.../impl/local/LocalStatusProvider.java | 53 ++
.../impl/shell/ShellContainerProcess.java | 93 +++
.../impl/shell/ShellContainerProvider.java | 151 ++++
.../shell/ShellContainerProviderProcess.java | 45 ++
.../impl/shell/ShellContainerSingleton.java | 58 ++
.../shell/ShellContainerStatusProvider.java | 52 ++
.../impl/shell/ShellStatusProvider.java | 64 ++
.../metamanager/impl/shell/ShellUtils.java | 54 ++
.../impl/yarn/ApplicationConfig.java | 32 +
.../impl/yarn/ContainerMetadata.java | 80 ++
.../metamanager/impl/yarn/MetadataProvider.java | 42 ++
.../metamanager/impl/yarn/MetadataService.java | 42 ++
.../helix/metamanager/impl/yarn/Utils.java | 94 +++
.../metamanager/impl/yarn/YarnApplication.java | 171 +++++
.../impl/yarn/YarnApplicationProperties.java | 91 +++
.../impl/yarn/YarnContainerData.java | 86 +++
.../impl/yarn/YarnContainerProcess.java | 53 ++
.../yarn/YarnContainerProcessProperties.java | 40 +
.../impl/yarn/YarnContainerProvider.java | 143 ++++
.../impl/yarn/YarnContainerProviderProcess.java | 158 ++++
.../yarn/YarnContainerProviderProperties.java | 64 ++
.../impl/yarn/YarnContainerService.java | 156 ++++
.../impl/yarn/YarnContainerStatusProvider.java | 52 ++
.../metamanager/impl/yarn/YarnDataProvider.java | 73 ++
.../impl/yarn/YarnMasterProcess.java | 144 ++++
.../impl/yarn/YarnMasterProperties.java | 13 +
.../impl/yarn/YarnMasterService.java | 414 +++++++++++
.../impl/yarn/YarnStatusProvider.java | 67 ++
.../helix/metamanager/impl/yarn/YarnUtils.java | 174 +++++
.../impl/yarn/ZookeeperMetadataProvider.java | 116 +++
.../impl/yarn/ZookeeperMetadataService.java | 102 +++
.../impl/yarn/ZookeeperYarnDataProvider.java | 100 +++
.../metamanager/managed/ContainerProcess.java | 85 +++
.../metamanager/managed/HelixClusterAdmin.java | 42 ++
.../managed/LocalClusterManager.java | 42 ++
.../managed/LocalContainerProvider.java | 87 +++
.../managed/LocalProcessProvider.java | 100 +++
.../managed/LocalStatusProvider.java | 22 +
.../helix/metamanager/managed/Managed.java | 64 ++
.../metamanager/managed/ManagedFactory.java | 30 +
.../metamanager/managed/ManagedProcess.java | 85 +++
.../managed/ShellContainerProvider.java | 85 +++
.../managed/ShellProcessProvider.java | 148 ++++
.../managed/YarnContainerProvider.java | 37 +
.../metamanager/provider/ProviderProcess.java | 82 +++
.../provider/ProviderProperties.java | 97 +++
.../provider/ProviderRebalancer.java | 352 +++++++++
.../provider/ProviderRebalancerSingleton.java | 38 +
.../provider/ProviderStateModel.java | 114 +++
.../provider/ProviderStateModelFactory.java | 27 +
.../provider/local/LocalContainerProvider.java | 75 ++
.../provider/local/LocalContainerSingleton.java | 40 +
.../local/LocalContainerStatusProvider.java | 37 +
.../provider/shell/ShellContainerProvider.java | 81 ++
.../provider/shell/ShellContainerSingleton.java | 38 +
.../shell/ShellContainerStatusProvider.java | 52 ++
.../provider/yarn/ApplicationConfig.java | 32 +
.../provider/yarn/ContainerMetadata.java | 50 ++
.../provider/yarn/MetadataService.java | 42 ++
.../helix/metamanager/provider/yarn/Utils.java | 94 +++
.../provider/yarn/YarnApplication.java | 125 ++++
.../provider/yarn/YarnContainerProcess.java | 60 ++
.../provider/yarn/YarnContainerProvider.java | 108 +++
.../provider/yarn/YarnContainerService.java | 129 ++++
.../yarn/YarnContainerStatusProvider.java | 52 ++
.../metamanager/provider/yarn/YarnMaster.java | 134 ++++
.../provider/yarn/YarnMasterProcess.java | 119 +++
.../provider/yarn/YarnMasterService.java | 361 +++++++++
.../metamanager/provider/yarn/YarnProcess.java | 171 +++++
.../provider/yarn/ZookeeperMetadataService.java | 102 +++
.../metamanager/yarn/ApplicationConfig.java | 32 +
.../metamanager/yarn/ContainerMetadata.java | 50 ++
.../helix/metamanager/yarn/ContainerNode.java | 61 ++
.../helix/metamanager/yarn/MessageNode.java | 20 +
.../helix/metamanager/yarn/MetadataService.java | 146 ++++
.../apache/helix/metamanager/yarn/Utils.java | 93 +++
.../helix/metamanager/yarn/YarnApplication.java | 126 ++++
.../helix/metamanager/yarn/YarnClient.java | 5 +
.../helix/metamanager/yarn/YarnContainer.java | 14 +
.../metamanager/yarn/YarnContainerProvider.java | 90 +++
.../metamanager/yarn/YarnContainerService.java | 370 ++++++++++
.../helix/metamanager/yarn/YarnHelper.java | 5 +
.../helix/metamanager/yarn/YarnMaster.java | 134 ++++
.../helix/metamanager/yarn/YarnProcess.java | 171 +++++
.../src/main/resources/2by2local.properties | 52 ++
.../resources/2by2localMixedModels.properties | 52 ++
.../src/main/resources/2by2shell.properties | 52 ++
.../src/main/resources/2by2yarn.properties | 58 ++
.../main/resources/2by2yarnZookeeper.properties | 58 ++
.../src/main/resources/2meta2managed.properties | 52 ++
.../src/main/resources/Boot2By2Local.properties | 87 +++
.../src/main/resources/Boot2By2Shell.properties | 87 +++
.../src/main/resources/Boot2By2Yarn.properties | 98 +++
.../src/main/resources/BootLocal.properties | 68 ++
.../src/main/resources/boot/cluster.properties | 2 +
.../main/resources/boot/controller.properties | 4 +
.../main/resources/boot/metacluster.properties | 4 +
.../resources/boot/metacontroller.properties | 4 +
.../src/main/resources/boot/resdb.properties | 4 +
.../src/main/resources/boot/resws.properties | 4 +
.../main/resources/boot/zookeeper.properties | 4 +
.../src/main/resources/container.properties | 1 +
.../src/main/resources/log4j.properties | 30 +
.../src/main/resources/redisLocal.properties | 50 ++
.../src/main/resources/redisYarn.properties | 52 ++
.../src/test/conf/testng-integration.xml | 27 +
.../src/test/conf/testng-unit.xml | 27 +
.../src/test/conf/testng.xml | 27 +
.../src/test/config/testng-integration.xml | 27 +
.../src/test/config/testng-unit.xml | 27 +
.../src/test/config/testng.xml | 27 +
.../helix/metamanager/BootstrapperIT.java | 134 ++++
.../apache/helix/metamanager/FailoverIT.java | 195 +++++
.../metamanager/LocalContainerProviderIT.java | 80 ++
.../metamanager/ShellContainerProviderIT.java | 95 +++
.../metamanager/TestContainerProvider.java | 17 +
.../helix/metamanager/TestStatusProvider.java | 20 +
.../org/apache/helix/metamanager/TestUtils.java | 438 +++++++++++
.../apache/helix/metamanager/TestUtilsTest.java | 30 +
.../apache/helix/metamanager/TestUtilsUT.java | 63 ++
.../metamanager/YarnContainerProviderIT.java | 101 +++
.../metamanager/integration/BootstrapperIT.java | 127 ++++
.../metamanager/integration/FailoverIT.java | 172 +++++
.../integration/LocalContainerProviderIT.java | 72 ++
.../integration/MultipleProviderFailoverIT.java | 148 ++++
.../integration/ShellContainerProviderIT.java | 87 +++
.../integration/YarnContainerProviderIT.java | 93 +++
.../helix/metamanager/unit/TestUtilsTestUT.java | 62 ++
.../helix/metamanager/unit/TestUtilsUT.java | 55 ++
.../src/test/resources/distributed.properties | 13 +
.../src/test/resources/log4j.properties | 30 +
.../src/test/resources/standalone.properties | 13 +
recipes/pom.xml | 1 +
324 files changed, 28806 insertions(+), 42 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/ZNRecord.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/ZNRecord.java b/helix-core/src/main/java/org/apache/helix/ZNRecord.java
index 56a6cf2..3ac9485 100644
--- a/helix-core/src/main/java/org/apache/helix/ZNRecord.java
+++ b/helix-core/src/main/java/org/apache/helix/ZNRecord.java
@@ -570,20 +570,27 @@ public class ZNRecord {
*/
public void subtract(ZNRecord value) {
for (String key : value.getSimpleFields().keySet()) {
- if (simpleFields.containsKey(key)) {
- simpleFields.remove(key);
- }
+ simpleFields.remove(key);
}
for (String key : value.getListFields().keySet()) {
- if (listFields.containsKey(key)) {
- listFields.remove(key);
- }
+ listFields.remove(key);
}
for (String key : value.getMapFields().keySet()) {
- if (mapFields.containsKey(key)) {
+ Map<String, String> map = value.getMapField(key);
+ if (map == null) {
mapFields.remove(key);
+ } else {
+ Map<String, String> nestedMap = mapFields.get(key);
+ if (nestedMap != null) {
+ for (String mapKey : map.keySet()) {
+ nestedMap.remove(mapKey);
+ }
+ if (nestedMap.size() == 0) {
+ mapFields.remove(key);
+ }
+ }
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java b/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
index 8e4e1ea..03e5489 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
@@ -181,7 +181,6 @@ public class GenericHelixController implements ConfigChangeListener, IdealStateC
Pipeline rebalancePipeline = new Pipeline();
rebalancePipeline.addStage(new ResourceComputationStage());
rebalancePipeline.addStage(new CurrentStateComputationStage());
- rebalancePipeline.addStage(new RebalanceIdealStateStage());
rebalancePipeline.addStage(new BestPossibleStateCalcStage());
rebalancePipeline.addStage(new MessageGenerationPhase());
rebalancePipeline.addStage(new MessageSelectionStage());
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateComputationStage.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateComputationStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateComputationStage.java
index 6097432..6a30a9d 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateComputationStage.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateComputationStage.java
@@ -118,9 +118,18 @@ public class CurrentStateComputationStage extends AbstractBaseStage {
for (String partitionName : partitionStateMap.keySet()) {
Partition partition = resource.getPartition(partitionName);
if (partition != null) {
- currentStateOutput.setCurrentState(resourceName, partition, instanceName,
- currentState.getState(partitionName));
-
+ currentStateOutput.setCurrentState(resourceName,
+ partition,
+ instanceName,
+ currentState.getState(partitionName));
+ currentStateOutput.setRequestedState(resourceName,
+ partition,
+ instanceName,
+ currentState.getRequestedState(partitionName));
+ currentStateOutput.setInfo(resourceName,
+ partition,
+ instanceName,
+ currentState.getInfo(partitionName));
} else {
// log
}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateOutput.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateOutput.java b/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateOutput.java
index b41f14b..9537272 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateOutput.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/stages/CurrentStateOutput.java
@@ -22,13 +22,19 @@ package org.apache.helix.controller.stages;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
-
import org.apache.helix.model.CurrentState;
import org.apache.helix.model.Partition;
public class CurrentStateOutput {
private final Map<String, Map<Partition, Map<String, String>>> _currentStateMap;
private final Map<String, Map<Partition, Map<String, String>>> _pendingStateMap;
+ // Contains per-resource maps of partition -> (instance, requested_state). This corresponds to the REQUESTED_STATE
+ // field in the CURRENTSTATES node.
+ private final Map<String, Map<Partition, Map<String, String>>> _requestedStateMap;
+ // Contains per-resource maps of partition -> (instance, info). This corresponds to the INFO field in the
+ // CURRENTSTATES node. This is information returned by state transition methods on the participants. It may be used
+ // by the rebalancer.
+ private final Map<String, Map<Partition, Map<String, String>>> _infoMap;
private final Map<String, String> _resourceStateModelMap;
private final Map<String, CurrentState> _curStateMetaMap;
@@ -37,7 +43,8 @@ public class CurrentStateOutput {
_pendingStateMap = new HashMap<String, Map<Partition, Map<String, String>>>();
_resourceStateModelMap = new HashMap<String, String>();
_curStateMetaMap = new HashMap<String, CurrentState>();
-
+ _requestedStateMap = new HashMap<String, Map<Partition, Map<String, String>>>();
+ _infoMap = new HashMap<String, Map<Partition, Map<String, String>>>();
}
public void setResourceStateModelDef(String resourceName, String stateModelDefName) {
@@ -78,6 +85,29 @@ public class CurrentStateOutput {
_currentStateMap.get(resourceName).get(partition).put(instanceName, state);
}
+ public void setRequestedState(String resourceName, Partition partition, String instanceName, String state) {
+ if (!_requestedStateMap.containsKey(resourceName)) {
+ _requestedStateMap.put(resourceName, new HashMap<Partition, Map<String, String>>());
+ }
+ if (!_requestedStateMap.get(resourceName).containsKey(partition)) {
+ _requestedStateMap.get(resourceName).put(partition, new HashMap<String, String>());
+ }
+ _requestedStateMap.get(resourceName).get(partition).put(instanceName, state);
+ }
+
+ public void setInfo(String resourceName, Partition partition, String instanceName, String state)
+ {
+ if (!_infoMap.containsKey(resourceName))
+ {
+ _infoMap.put(resourceName, new HashMap<Partition, Map<String, String>>());
+ }
+ if (!_infoMap.get(resourceName).containsKey(partition))
+ {
+ _infoMap.get(resourceName).put(partition, new HashMap<String, String>());
+ }
+ _infoMap.get(resourceName).get(partition).put(instanceName, state);
+ }
+
public void setPendingState(String resourceName, Partition partition, String instanceName,
String state) {
if (!_pendingStateMap.containsKey(resourceName)) {
@@ -107,6 +137,34 @@ public class CurrentStateOutput {
return null;
}
+ public String getRequestedState(String resourceName, Partition partition, String instanceName)
+ {
+ Map<Partition, Map<String, String>> map = _requestedStateMap.get(resourceName);
+ if (map != null)
+ {
+ Map<String, String> instanceStateMap = map.get(partition);
+ if (instanceStateMap != null)
+ {
+ return instanceStateMap.get(instanceName);
+ }
+ }
+ return null;
+ }
+
+ public String getInfo(String resourceName, Partition partition, String instanceName)
+ {
+ Map<Partition, Map<String, String>> map = _infoMap.get(resourceName);
+ if (map != null)
+ {
+ Map<String, String> instanceStateMap = map.get(partition);
+ if (instanceStateMap != null)
+ {
+ return instanceStateMap.get(instanceName);
+ }
+ }
+ return null;
+ }
+
/**
* given (resource, partition, instance), returns toState
* @param resourceName
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixStateTransitionHandler.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixStateTransitionHandler.java b/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixStateTransitionHandler.java
index 627babc..8da7ec9 100644
--- a/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixStateTransitionHandler.java
+++ b/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixStateTransitionHandler.java
@@ -25,10 +25,10 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
-
import org.apache.helix.HelixAdmin;
-import org.apache.helix.HelixConstants;
import org.apache.helix.HelixDataAccessor;
import org.apache.helix.HelixDefinedState;
import org.apache.helix.HelixException;
@@ -36,9 +36,10 @@ import org.apache.helix.HelixManager;
import org.apache.helix.NotificationContext;
import org.apache.helix.NotificationContext.MapKey;
import org.apache.helix.PropertyKey;
+import org.apache.helix.PropertyKey.Builder;
+import org.apache.helix.ZNRecord;
import org.apache.helix.ZNRecordBucketizer;
import org.apache.helix.ZNRecordDelta;
-import org.apache.helix.PropertyKey.Builder;
import org.apache.helix.ZNRecordDelta.MergeOperation;
import org.apache.helix.model.CurrentState;
import org.apache.helix.model.Message;
@@ -57,7 +58,7 @@ public class HelixStateTransitionHandler extends MessageHandler {
}
}
- private static Logger logger = Logger.getLogger(HelixStateTransitionHandler.class);
+ private static final Logger logger = Logger.getLogger(HelixStateTransitionHandler.class);
private final StateModel _stateModel;
StatusUpdateUtil _statusUpdateUtil;
private final StateModelParser _transitionMethodFinder;
@@ -110,6 +111,43 @@ public class HelixStateTransitionHandler extends MessageHandler {
logger.error(errorMessage);
throw new HelixStateMismatchException(errorMessage);
}
+
+ // Reset the REQUESTED_STATE property if it exists.
+ try
+ {
+ String instance = _manager.getInstanceName();
+ String sessionId = _message.getTgtSessionId();
+ String resource = _message.getResourceName();
+ ZNRecordBucketizer bucketizer = new ZNRecordBucketizer(_message.getBucketSize());
+ PropertyKey key = accessor.keyBuilder().currentState(instance,
+ sessionId,
+ resource,
+ bucketizer.getBucketName(partitionName));
+ ZNRecord rec = new ZNRecord(resource);
+ Map<String, String> map = new TreeMap<String, String>();
+ map.put(CurrentState.CurrentStateProperty.REQUESTED_STATE.name(), null);
+ rec.getMapFields().put(partitionName, map);
+ ZNRecordDelta delta = new ZNRecordDelta(rec, ZNRecordDelta.MergeOperation.SUBTRACT);
+ List<ZNRecordDelta> deltaList = new ArrayList<ZNRecordDelta>();
+ deltaList.add(delta);
+ CurrentState currStateUpdate = new CurrentState(resource);
+ currStateUpdate.setDeltaList(deltaList);
+
+ // Update the ZK current state of the node
+ accessor.updateProperty(key, currStateUpdate);
+ }
+ catch (Exception e)
+ {
+ logger.error("Error when removing " +
+ CurrentState.CurrentStateProperty.REQUESTED_STATE.name() + " from current state.", e);
+ StateTransitionError error = new StateTransitionError(ErrorType.FRAMEWORK, ErrorCode.ERROR, e);
+ _stateModel.rollbackOnError(_message, _notificationContext, error);
+ _statusUpdateUtil.logError(_message,
+ HelixStateTransitionHandler.class,
+ e,
+ "Error when removing " + CurrentState.CurrentStateProperty.REQUESTED_STATE.name() + " from current state.",
+ accessor);
+ }
}
void postHandleMessage() {
@@ -138,6 +176,9 @@ public class HelixStateTransitionHandler extends MessageHandler {
return;
}
+ // Set the INFO property.
+ _currentStateDelta.setInfo(partitionKey, taskResult.getInfo());
+
if (taskResult.isSuccess()) {
// String fromState = message.getFromState();
String toState = _message.getToState();
@@ -147,10 +188,9 @@ public class HelixStateTransitionHandler extends MessageHandler {
// for "OnOfflineToDROPPED" message, we need to remove the resource key record
// from the current state of the instance because the resource key is dropped.
// In the state model it will be stayed as "OFFLINE", which is OK.
- ZNRecordDelta delta =
- new ZNRecordDelta(_currentStateDelta.getRecord(), MergeOperation.SUBTRACT);
- // Don't subtract simple fields since they contain stateModelDefRef
- delta._record.getSimpleFields().clear();
+ ZNRecord rec = new ZNRecord(_currentStateDelta.getId());
+ rec.getMapFields().put(partitionKey, null);
+ ZNRecordDelta delta = new ZNRecordDelta(rec, MergeOperation.SUBTRACT);
List<ZNRecordDelta> deltaList = new ArrayList<ZNRecordDelta>();
deltaList.add(delta);
@@ -288,15 +328,28 @@ public class HelixStateTransitionHandler extends MessageHandler {
String fromState = message.getFromState();
String toState = message.getToState();
methodToInvoke =
- _transitionMethodFinder.getMethodForTransition(_stateModel.getClass(), fromState, toState,
- new Class[] {
- Message.class, NotificationContext.class
- });
+ _transitionMethodFinder.getMethodForTransition(_stateModel.getClass(),
+ fromState,
+ toState,
+ new Class[] { Message.class,
+ NotificationContext.class });
if (methodToInvoke != null) {
- methodToInvoke.invoke(_stateModel, new Object[] {
- message, context
- });
+ logger.info(String.format("Instance %s, partition %s received state transition from %s to %s on session %s.",
+ message.getTgtName(),
+ message.getPartitionName(),
+ message.getFromState(),
+ message.getToState(),
+ message.getTgtSessionId()));
+
+ Object result = methodToInvoke.invoke(_stateModel, new Object[] { message, context });
taskResult.setSuccess(true);
+ String resultStr;
+ if (result == null || result instanceof Void) {
+ resultStr = "";
+ } else {
+ resultStr = result.toString();
+ }
+ taskResult.setInfo(resultStr);
} else {
String errorMessage =
"Unable to find method for transition from " + fromState + " to " + toState + " in "
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixTaskResult.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixTaskResult.java b/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixTaskResult.java
index 22c4fcd..ced9c65 100644
--- a/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixTaskResult.java
+++ b/helix-core/src/main/java/org/apache/helix/messaging/handling/HelixTaskResult.java
@@ -26,6 +26,7 @@ public class HelixTaskResult {
private boolean _success;
private String _message = "";
+ private String _info = "";
private Map<String, String> _taskResultMap = new HashMap<String, String>();
private boolean _interrupted = false;
Exception _exception = null;
@@ -54,6 +55,14 @@ public class HelixTaskResult {
this._message = message;
}
+ public String getInfo() {
+ return _info;
+ }
+
+ public void setInfo(String info) {
+ _info = info;
+ }
+
public Map<String, String> getTaskResultMap() {
return _taskResultMap;
}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/model/CurrentState.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/model/CurrentState.java b/helix-core/src/main/java/org/apache/helix/model/CurrentState.java
index 32854ab..47bccb9 100644
--- a/helix-core/src/main/java/org/apache/helix/model/CurrentState.java
+++ b/helix-core/src/main/java/org/apache/helix/model/CurrentState.java
@@ -39,6 +39,8 @@ public class CurrentState extends HelixProperty {
public enum CurrentStateProperty {
SESSION_ID,
CURRENT_STATE,
+ REQUESTED_STATE,
+ INFO,
STATE_MODEL_DEF,
STATE_MODEL_FACTORY_NAME,
RESOURCE // ,
@@ -115,6 +117,24 @@ public class CurrentState extends HelixProperty {
return null;
}
+ public String getInfo(String partitionName) {
+ Map<String, Map<String, String>> mapFields = _record.getMapFields();
+ Map<String, String> mapField = mapFields.get(partitionName);
+ if (mapField != null) {
+ return mapField.get(CurrentStateProperty.INFO.name());
+ }
+ return null;
+ }
+
+ public String getRequestedState(String partitionName) {
+ Map<String, Map<String, String>> mapFields = _record.getMapFields();
+ Map<String, String> mapField = mapFields.get(partitionName);
+ if (mapField != null) {
+ return mapField.get(CurrentStateProperty.REQUESTED_STATE.name());
+ }
+ return null;
+ }
+
/**
* Set the state model that the resource follows
* @param stateModelName an identifier of the state model
@@ -144,6 +164,22 @@ public class CurrentState extends HelixProperty {
mapFields.get(partitionName).put(CurrentStateProperty.CURRENT_STATE.toString(), state);
}
+ public void setInfo(String partitionName, String info) {
+ Map<String, Map<String, String>> mapFields = _record.getMapFields();
+ if (mapFields.get(partitionName) == null) {
+ mapFields.put(partitionName, new TreeMap<String, String>());
+ }
+ mapFields.get(partitionName).put(CurrentStateProperty.INFO.name(), info);
+ }
+
+ public void setRequestedState(String partitionName, String state) {
+ Map<String, Map<String, String>> mapFields = _record.getMapFields();
+ if (mapFields.get(partitionName) == null) {
+ mapFields.put(partitionName, new TreeMap<String, String>());
+ }
+ mapFields.get(partitionName).put(CurrentStateProperty.REQUESTED_STATE.name(), state);
+ }
+
/**
* Set the state model factory
* @param factoryName the name of the factory
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/model/ResourceAssignment.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/model/ResourceAssignment.java b/helix-core/src/main/java/org/apache/helix/model/ResourceAssignment.java
index 2b3d14d..7943ea2 100644
--- a/helix-core/src/main/java/org/apache/helix/model/ResourceAssignment.java
+++ b/helix-core/src/main/java/org/apache/helix/model/ResourceAssignment.java
@@ -25,6 +25,8 @@ import java.util.List;
import java.util.Map;
import org.apache.helix.HelixProperty;
+import org.apache.helix.ZNRecord;
+
/**
* Represents the assignments of replicas for an entire resource, keyed on partitions of the
@@ -48,6 +50,14 @@ public class ResourceAssignment extends HelixProperty {
}
/**
+ * Initialize a mapping from a {@link ZNRecord}.
+ * @param record The underlying ZNRecord.
+ */
+ public ResourceAssignment(ZNRecord record) {
+ super(record);
+ }
+
+ /**
* Initialize a mapping from an existing ResourceMapping
* @param existingMapping pre-populated ResourceMapping
*/
@@ -55,6 +65,10 @@ public class ResourceAssignment extends HelixProperty {
super(existingMapping);
}
+ public String getResourceName() {
+ return _record.getId();
+ }
+
/**
* Get the currently mapped partitions
* @return list of Partition objects
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TargetState.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TargetState.java b/helix-core/src/main/java/org/apache/helix/task/TargetState.java
new file mode 100644
index 0000000..a84c7ea
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TargetState.java
@@ -0,0 +1,25 @@
+package org.apache.helix.task;
+
+
+/**
+ * Enumeration of target states for a task.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public enum TargetState
+{
+ /**
+ * Indicates that the rebalancer must start/resume the task.
+ */
+ START,
+ /**
+ * Indicates that the rebalancer should stop any running task partitions and cease doing any further task
+ * assignments.
+ */
+ STOP,
+ /**
+ * Indicates that the rebalancer must delete this task.
+ */
+ DELETE
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/Task.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/Task.java b/helix-core/src/main/java/org/apache/helix/task/Task.java
new file mode 100644
index 0000000..2741f9e
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/Task.java
@@ -0,0 +1,25 @@
+package org.apache.helix.task;
+
+
+/**
+ * The interface that is to be implemented by a specific task implementation.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public interface Task
+{
+ /**
+ * Execute the task.
+ *
+ * @return A {@link TaskResult} object indicating the status of the task and any additional context information that
+ * can be interpreted by the specific {@link Task} implementation.
+ */
+ TaskResult run();
+
+ /**
+ * Signals the task to stop execution. The task implementation should carry out any clean up actions that may be
+ * required and return from the {@link #run()} method.
+ */
+ void cancel();
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskConfig.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskConfig.java b/helix-core/src/main/java/org/apache/helix/task/TaskConfig.java
new file mode 100644
index 0000000..f85160a
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskConfig.java
@@ -0,0 +1,333 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+
+/**
+ * Provides a typed interface to task configurations.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskConfig
+{
+ //// Property names ////
+
+ /** The name of the workflow to which the task belongs. */
+ public static final String WORKFLOW_ID = "WorkflowID";
+ /** The name of the target resource. */
+ public static final String TARGET_RESOURCE = "TargetResource";
+ /** The set of the target partition states. The value must be a comma-separated list of partition states. */
+ public static final String TARGET_PARTITION_STATES = "TargetPartitionStates";
+ /** The set of the target partition ids. The value must be a comma-separated list of partition ids. */
+ public static final String TARGET_PARTITIONS = "TargetPartitions";
+ /** The command that is to be run by participants. */
+ public static final String COMMAND = "Command";
+ /** The command configuration to be used by the task partitions. */
+ public static final String COMMAND_CONFIG = "CommandConfig";
+ /** The timeout for a task partitions. */
+ public static final String TIMEOUT_PER_PARTITION = "TimeoutPerPartition";
+ /** The maximum number of times the task rebalancer may attempt to execute a task partitions. */
+ public static final String MAX_ATTEMPTS_PER_PARTITION = "MaxAttemptsPerPartition";
+ /** The number of concurrent tasks that are allowed to run on an instance. */
+ public static final String NUM_CONCURRENT_TASKS_PER_INSTANCE = "ConcurrentTasksPerInstance";
+
+ //// Default property values ////
+
+ public static final long DEFAULT_TIMEOUT_PER_PARTITION = 60 * 60 * 1000; // 1 hr.
+ public static final int DEFAULT_MAX_ATTEMPTS_PER_PARTITION = 10;
+ public static final int DEFAULT_NUM_CONCURRENT_TASKS_PER_INSTANCE = 1;
+
+ private final String _workflow;
+ private final String _targetResource;
+ private final List<Integer> _targetPartitions;
+ private final Set<String> _targetPartitionStates;
+ private final String _command;
+ private final String _commandConfig;
+ private final long _timeoutPerPartition;
+ private final int _numConcurrentTasksPerInstance;
+ private final int _maxAttemptsPerPartition;
+
+ private TaskConfig(String workflow,
+ String targetResource,
+ List<Integer> targetPartitions,
+ Set<String> targetPartitionStates,
+ String command,
+ String commandConfig,
+ long timeoutPerPartition,
+ int numConcurrentTasksPerInstance,
+ int maxAttemptsPerPartition)
+ {
+ _workflow = workflow;
+ _targetResource = targetResource;
+ _targetPartitions = targetPartitions;
+ _targetPartitionStates = targetPartitionStates;
+ _command = command;
+ _commandConfig = commandConfig;
+ _timeoutPerPartition = timeoutPerPartition;
+ _numConcurrentTasksPerInstance = numConcurrentTasksPerInstance;
+ _maxAttemptsPerPartition = maxAttemptsPerPartition;
+ }
+
+ public String getWorkflow()
+ {
+ return _workflow == null ? Workflow.UNSPECIFIED : _workflow;
+ }
+
+ public String getTargetResource()
+ {
+ return _targetResource;
+ }
+
+ public List<Integer> getTargetPartitions()
+ {
+ return _targetPartitions;
+ }
+
+ public Set<String> getTargetPartitionStates()
+ {
+ return _targetPartitionStates;
+ }
+
+ public String getCommand()
+ {
+ return _command;
+ }
+
+ public String getCommandConfig()
+ {
+ return _commandConfig;
+ }
+
+ public long getTimeoutPerPartition()
+ {
+ return _timeoutPerPartition;
+ }
+
+ public int getNumConcurrentTasksPerInstance()
+ {
+ return _numConcurrentTasksPerInstance;
+ }
+
+ public int getMaxAttemptsPerPartition()
+ {
+ return _maxAttemptsPerPartition;
+ }
+
+ public Map<String, String> getResourceConfigMap()
+ {
+ Map<String, String> cfgMap = new HashMap<String,String>();
+ cfgMap.put(TaskConfig.WORKFLOW_ID, _workflow);
+ cfgMap.put(TaskConfig.COMMAND, _command);
+ cfgMap.put(TaskConfig.COMMAND_CONFIG, _commandConfig);
+ cfgMap.put(TaskConfig.TARGET_RESOURCE, _targetResource);
+ cfgMap.put(TaskConfig.TARGET_PARTITION_STATES, Joiner.on(",").join(_targetPartitionStates));
+ if (_targetPartitions != null)
+ {
+ cfgMap.put(TaskConfig.TARGET_PARTITIONS, Joiner.on(",").join(_targetPartitions));
+ }
+ cfgMap.put(TaskConfig.TIMEOUT_PER_PARTITION, "" + _timeoutPerPartition);
+ cfgMap.put(TaskConfig.MAX_ATTEMPTS_PER_PARTITION, "" + _maxAttemptsPerPartition);
+
+ return cfgMap;
+ }
+
+ /**
+ * A builder for {@link TaskConfig}. Validates the configurations.
+ */
+ public static class Builder
+ {
+ private String _workflow;
+ private String _targetResource;
+ private List<Integer> _targetPartitions;
+ private Set<String> _targetPartitionStates;
+ private String _command;
+ private String _commandConfig;
+ private long _timeoutPerPartition = DEFAULT_TIMEOUT_PER_PARTITION;
+ private int _numConcurrentTasksPerInstance = DEFAULT_NUM_CONCURRENT_TASKS_PER_INSTANCE;
+ private int _maxAttemptsPerPartition = DEFAULT_MAX_ATTEMPTS_PER_PARTITION;
+
+ public TaskConfig build()
+ {
+ validate();
+
+ return new TaskConfig(_workflow,
+ _targetResource,
+ _targetPartitions,
+ _targetPartitionStates,
+ _command,
+ _commandConfig,
+ _timeoutPerPartition,
+ _numConcurrentTasksPerInstance,
+ _maxAttemptsPerPartition);
+ }
+
+ /**
+ * Convenience method to build a {@link TaskConfig} from a {@code Map<String, String>}.
+ *
+ * @param cfg A map of property names to their string representations.
+ *
+ * @return A {@link Builder}.
+ */
+ public static Builder fromMap(Map<String, String> cfg)
+ {
+ Builder b = new Builder();
+ if (cfg.containsKey(WORKFLOW_ID))
+ {
+ b.setWorkflow(cfg.get(WORKFLOW_ID));
+ }
+ if (cfg.containsKey(TARGET_RESOURCE))
+ {
+ b.setTargetResource(cfg.get(TARGET_RESOURCE));
+ }
+ if (cfg.containsKey(TARGET_PARTITIONS))
+ {
+ b.setTargetPartitions(csvToIntList(cfg.get(TARGET_PARTITIONS)));
+ }
+ if (cfg.containsKey(TARGET_PARTITION_STATES))
+ {
+ b.setTargetPartitionStates(new HashSet<String>(Arrays.asList(cfg.get(TARGET_PARTITION_STATES).split(","))));
+ }
+ if (cfg.containsKey(COMMAND))
+ {
+ b.setCommand(cfg.get(COMMAND));
+ }
+ if (cfg.containsKey(COMMAND_CONFIG))
+ {
+ b.setCommandConfig(cfg.get(COMMAND_CONFIG));
+ }
+ if (cfg.containsKey(TIMEOUT_PER_PARTITION))
+ {
+ b.setTimeoutPerPartition(Long.parseLong(cfg.get(TIMEOUT_PER_PARTITION)));
+ }
+ if (cfg.containsKey(NUM_CONCURRENT_TASKS_PER_INSTANCE))
+ {
+ b.setNumConcurrentTasksPerInstance(Integer.parseInt(cfg.get(NUM_CONCURRENT_TASKS_PER_INSTANCE)));
+ }
+ if (cfg.containsKey(MAX_ATTEMPTS_PER_PARTITION))
+ {
+ b.setMaxAttemptsPerPartition(Integer.parseInt(cfg.get(MAX_ATTEMPTS_PER_PARTITION)));
+ }
+
+ return b;
+ }
+
+ public Builder setWorkflow(String v)
+ {
+ _workflow = v;
+ return this;
+ }
+
+ public Builder setTargetResource(String v)
+ {
+ _targetResource = v;
+ return this;
+ }
+
+ public Builder setTargetPartitions(List<Integer> v)
+ {
+ _targetPartitions = ImmutableList.copyOf(v);
+ return this;
+ }
+
+ public Builder setTargetPartitionStates(Set<String> v)
+ {
+ _targetPartitionStates = ImmutableSet.copyOf(v);
+ return this;
+ }
+
+ public Builder setCommand(String v)
+ {
+ _command = v;
+ return this;
+ }
+
+ public Builder setCommandConfig(String v)
+ {
+ _commandConfig = v;
+ return this;
+ }
+
+ public Builder setTimeoutPerPartition(long v)
+ {
+ _timeoutPerPartition = v;
+ return this;
+ }
+
+ public Builder setNumConcurrentTasksPerInstance(int v)
+ {
+ _numConcurrentTasksPerInstance = v;
+ return this;
+ }
+
+ public Builder setMaxAttemptsPerPartition(int v)
+ {
+ _maxAttemptsPerPartition = v;
+ return this;
+ }
+
+ private void validate()
+ {
+ if (_targetResource == null)
+ {
+ throw new IllegalArgumentException(String.format("%s cannot be null", TARGET_RESOURCE));
+ }
+ if (_targetPartitionStates != null && _targetPartitionStates.isEmpty())
+ {
+ throw new IllegalArgumentException(String.format("%s cannot be an empty set",
+ TARGET_PARTITION_STATES));
+ }
+ if (_command == null)
+ {
+ throw new IllegalArgumentException(String.format("%s cannot be null", COMMAND));
+ }
+ if (_timeoutPerPartition < 0)
+ {
+ throw new IllegalArgumentException(String.format("%s has invalid value %s",
+ TIMEOUT_PER_PARTITION,
+ _timeoutPerPartition));
+ }
+ if (_numConcurrentTasksPerInstance < 1)
+ {
+ throw new IllegalArgumentException(String.format("%s has invalid value %s",
+ NUM_CONCURRENT_TASKS_PER_INSTANCE,
+ _numConcurrentTasksPerInstance));
+ }
+ if (_maxAttemptsPerPartition < 1)
+ {
+ throw new IllegalArgumentException(String.format("%s has invalid value %s",
+ MAX_ATTEMPTS_PER_PARTITION,
+ _maxAttemptsPerPartition));
+ }
+ if(_workflow == null)
+ {
+ throw new IllegalArgumentException(String.format("%s cannot be null", WORKFLOW_ID));
+ }
+ }
+
+ private static List<Integer> csvToIntList(String csv)
+ {
+ String[] vals = csv.split(",");
+ List<Integer> l = new ArrayList<Integer>();
+ for (String v : vals)
+ {
+ l.add(Integer.parseInt(v));
+ }
+
+ return l;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskConstants.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskConstants.java b/helix-core/src/main/java/org/apache/helix/task/TaskConstants.java
new file mode 100644
index 0000000..4ff8f0a
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskConstants.java
@@ -0,0 +1,31 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+/**
+ * Constants used in the task framework.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskConstants
+{
+ /**
+ * The name of the {@link Task} state model.
+ */
+ public static final String STATE_MODEL_NAME = "Task";
+ /**
+ * Field in workflow resource config housing dag
+ */
+ public static final String WORKFLOW_DAG_FIELD = "dag";
+ /**
+ * Field in workflow resource config for flow name
+ */
+ public static final String WORKFLOW_NAME_FIELD = "name";
+ /**
+ * The root property store path at which the {@link TaskRebalancer} stores context information.
+ */
+ public static final String REBALANCER_CONTEXT_ROOT = "/TaskRebalancer";
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskContext.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskContext.java b/helix-core/src/main/java/org/apache/helix/task/TaskContext.java
new file mode 100644
index 0000000..59f15f0
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskContext.java
@@ -0,0 +1,147 @@
+/*
+ * $id$
+ */
+package org.apache.helix.task;
+
+
+import java.util.Map;
+import java.util.TreeMap;
+import org.apache.helix.HelixProperty;
+import org.apache.helix.ZNRecord;
+
+
+/**
+ * Provides a typed interface to the context information stored by {@link TaskRebalancer} in the Helix property store.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public class TaskContext extends HelixProperty
+{
+ public static final String START_TIME = "START_TIME";
+ public static final String PARTITION_STATE = "STATE";
+ public static final String NUM_ATTEMPTS = "NUM_ATTEMPTS";
+ public static final String FINISH_TIME = "FINISH_TIME";
+
+ public TaskContext(ZNRecord record)
+ {
+ super(record);
+ }
+
+ public void setStartTime(long t)
+ {
+ _record.setSimpleField(START_TIME, String.valueOf(t));
+ }
+
+ public long getStartTime()
+ {
+ String tStr = _record.getSimpleField(START_TIME);
+ if (tStr == null)
+ {
+ return -1;
+ }
+
+ return Long.parseLong(tStr);
+ }
+
+ public void setPartitionState(int p, TaskPartitionState s)
+ {
+ String pStr = String.valueOf(p);
+ Map<String, String> map = _record.getMapField(pStr);
+ if (map == null)
+ {
+ map = new TreeMap<String, String>();
+ _record.setMapField(pStr, map);
+ }
+ map.put(PARTITION_STATE, s.name());
+ }
+
+ public TaskPartitionState getPartitionState(int p)
+ {
+ Map<String, String> map = _record.getMapField(String.valueOf(p));
+ if (map == null)
+ {
+ return null;
+ }
+
+ String str = map.get(PARTITION_STATE);
+ if (str != null)
+ {
+ return TaskPartitionState.valueOf(str);
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public void setPartitionNumAttempts(int p, int n)
+ {
+ String pStr = String.valueOf(p);
+ Map<String, String> map = _record.getMapField(pStr);
+ if (map == null)
+ {
+ map = new TreeMap<String, String>();
+ _record.setMapField(pStr, map);
+ }
+ map.put(NUM_ATTEMPTS, String.valueOf(n));
+ }
+
+ public int incrementNumAttempts(int pId)
+ {
+ int n = this.getPartitionNumAttempts(pId);
+ if (n < 0)
+ {
+ n = 0;
+ }
+ n += 1;
+ this.setPartitionNumAttempts(pId, n);
+ return n;
+ }
+
+ public int getPartitionNumAttempts(int p)
+ {
+ Map<String, String> map = _record.getMapField(String.valueOf(p));
+ if (map == null)
+ {
+ return -1;
+ }
+
+ String nStr = map.get(NUM_ATTEMPTS);
+ if (nStr == null)
+ {
+ return -1;
+ }
+
+ return Integer.parseInt(nStr);
+ }
+
+ public void setPartitionFinishTime(int p, long t)
+ {
+ String pStr = String.valueOf(p);
+ Map<String, String> map = _record.getMapField(pStr);
+ if (map == null)
+ {
+ map = new TreeMap<String, String>();
+ _record.setMapField(pStr, map);
+ }
+ map.put(FINISH_TIME, String.valueOf(t));
+ }
+
+ public long getPartitionFinishTime(int p)
+ {
+ Map<String, String> map = _record.getMapField(String.valueOf(p));
+ if (map == null)
+ {
+ return -1;
+ }
+
+ String tStr = map.get(FINISH_TIME);
+ if (tStr == null)
+ {
+ return -1;
+ }
+
+ return Long.parseLong(tStr);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskDag.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskDag.java b/helix-core/src/main/java/org/apache/helix/task/TaskDag.java
new file mode 100644
index 0000000..009d73d
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskDag.java
@@ -0,0 +1,157 @@
+package org.apache.helix.task;
+
+import org.codehaus.jackson.annotate.JsonProperty;
+import org.codehaus.jackson.map.ObjectMapper;
+
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/**
+ * Provides a convenient way to construct, traverse,
+ * and validate a task dependency graph
+ *
+ * @author Chris Beavers <cb...@linkedin.com>
+ */
+public class TaskDag
+{
+ @JsonProperty("parentsToChildren")
+ private Map<String, Set<String>> _parentsToChildren;
+
+ @JsonProperty("childrenToParents")
+ private Map<String, Set<String>> _childrenToParents;
+
+ @JsonProperty("allNodes")
+ private Set<String> _allNodes;
+
+ public static final TaskDag EMPTY_DAG = new TaskDag();
+
+ public TaskDag()
+ {
+ _parentsToChildren = new TreeMap<String, Set<String>>();
+ _childrenToParents = new TreeMap<String, Set<String>>();
+ _allNodes = new TreeSet<String>();
+ }
+
+ public void addParentToChild(String parent, String child)
+ {
+ if(!_parentsToChildren.containsKey(parent))
+ {
+ _parentsToChildren.put(parent, new TreeSet<String>());
+ }
+ _parentsToChildren.get(parent).add(child);
+
+ if(!_childrenToParents.containsKey(child))
+ {
+ _childrenToParents.put(child, new TreeSet<String>());
+ }
+ _childrenToParents.get(child).add(parent);
+
+ _allNodes.add(parent);
+ _allNodes.add(child);
+ }
+
+ public void addNode(String node)
+ {
+ _allNodes.add(node);
+ }
+
+ public Map<String, Set<String>> getParentsToChildren()
+ {
+ return _parentsToChildren;
+ }
+
+ public Map<String, Set<String>> getChildrenToParents()
+ {
+ return _childrenToParents;
+ }
+
+ public Set<String> getAllNodes()
+ {
+ return _allNodes;
+ }
+
+ public Set<String> getDirectChildren(String node)
+ {
+ if(!_parentsToChildren.containsKey(node))
+ {
+ return new TreeSet<String>();
+ }
+ return _parentsToChildren.get(node);
+ }
+
+ public Set<String> getDirectParents(String node)
+ {
+ if(!_childrenToParents.containsKey(node))
+ {
+ return new TreeSet<String>();
+ }
+ return _childrenToParents.get(node);
+ }
+
+ public String toJson() throws Exception
+ {
+ return new ObjectMapper().writeValueAsString(this);
+ }
+
+ public static TaskDag fromJson(String json)
+ {
+ try
+ {
+ return new ObjectMapper().readValue(json, TaskDag.class);
+ }
+ catch(Exception e)
+ {
+ throw new IllegalArgumentException("Unable to parse json " + json + " into task dag");
+ }
+ }
+
+ /**
+ * Checks that dag contains no cycles and all nodes are reachable.
+ */
+ public void validate()
+ {
+ Set<String> prevIteration = new TreeSet<String>();
+
+ // get all unparented nodes
+ for(String node : _allNodes)
+ {
+ if(getDirectParents(node).isEmpty())
+ {
+ prevIteration.add(node);
+ }
+ }
+
+ // visit children nodes up to max iteration count, by which point we should have exited naturally
+ Set<String> allNodesReached = new TreeSet<String>();
+ int iterationCount = 0;
+ int maxIterations = _allNodes.size() + 1;
+
+ while(!prevIteration.isEmpty() && iterationCount < maxIterations)
+ {
+ // construct set of all children reachable from prev iteration
+ Set<String> thisIteration = new TreeSet<String>();
+ for(String node : prevIteration)
+ {
+ thisIteration.addAll(getDirectChildren(node));
+ }
+
+ allNodesReached.addAll(prevIteration);
+ prevIteration = thisIteration;
+ iterationCount++;
+ }
+
+ allNodesReached.addAll(prevIteration);
+
+ if(iterationCount >= maxIterations)
+ {
+ throw new IllegalArgumentException("DAG invalid: cycles detected");
+ }
+
+ if(!allNodesReached.containsAll(_allNodes))
+ {
+ throw new IllegalArgumentException("DAG invalid: unreachable nodes found. Reachable set is " + allNodesReached);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskDriver.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskDriver.java b/helix-core/src/main/java/org/apache/helix/task/TaskDriver.java
new file mode 100644
index 0000000..5ce1c31
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskDriver.java
@@ -0,0 +1,382 @@
+package org.apache.helix.task;
+
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.OptionGroup;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixDataAccessor;
+import org.apache.helix.HelixManager;
+import org.apache.helix.HelixManagerFactory;
+import org.apache.helix.HelixProperty;
+import org.apache.helix.InstanceType;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.builder.CustomModeISBuilder;
+import org.apache.log4j.Logger;
+
+
+/**
+ * CLI for scheduling/canceling workflows
+ *
+ * @author Chris Beavers <cb...@linkedin.com>
+ */
+public class TaskDriver
+{
+ /** For logging */
+ private static final Logger LOG = Logger.getLogger(TaskDriver.class);
+
+ /** Required option name for Helix endpoint */
+ private static final String ZK_ADDRESS = "zk";
+
+ /** Required option name for cluster against which to run task */
+ private static final String CLUSTER_NAME_OPTION = "cluster";
+
+ /** Required option name for task resource within target cluster */
+ private static final String RESOURCE_OPTION = "resource";
+
+ /** Field for specifying a workflow file when starting a job */
+ private static final String WORKFLOW_FILE_OPTION = "file";
+
+ private final HelixManager _manager;
+ private final HelixAdmin _admin;
+ private final String _clusterName;
+
+ /** Commands which may be parsed from the first argument to main */
+ private enum DriverCommand {
+ start, stop, delete, resume, list
+ }
+
+ public TaskDriver(HelixManager manager)
+ {
+ _manager = manager;
+ _clusterName = manager.getClusterName();
+ _admin = manager.getClusterManagmentTool();
+ }
+
+ /**
+ * Parses the first argument as a driver command and the rest of the
+ * arguments are parsed based on that command. Constructs a Helix
+ * message and posts it to the controller
+ */
+ public static void main(String[] args) throws Exception
+ {
+ String[] cmdArgs = Arrays.copyOfRange(args, 1, args.length);
+ CommandLine cl = parseOptions(cmdArgs, constructOptions(), args[0]);
+ String zkAddr = cl.getOptionValue(ZK_ADDRESS);
+ String clusterName = cl.getOptionValue(CLUSTER_NAME_OPTION);
+ String resource = cl.getOptionValue(RESOURCE_OPTION);
+
+ if(zkAddr == null || clusterName == null || resource == null)
+ {
+ printUsage(constructOptions(), "[cmd]");
+ throw new IllegalArgumentException("zk, cluster, and resource must all be non-null for all commands");
+ }
+
+ HelixManager helixMgr = HelixManagerFactory.getZKHelixManager(clusterName,
+ "Admin",
+ InstanceType.ADMINISTRATOR,
+ zkAddr);
+ helixMgr.connect();
+ TaskDriver driver = new TaskDriver(helixMgr);
+ try
+ {
+ DriverCommand cmd = DriverCommand.valueOf(args[0]);
+ switch(cmd)
+ {
+ case start:
+ if(cl.hasOption(WORKFLOW_FILE_OPTION))
+ {
+ driver.start(Workflow.parse(new File(cl.getOptionValue(WORKFLOW_FILE_OPTION))));
+ }
+ else
+ {
+ throw new IllegalArgumentException("Workflow file is required to start flow.");
+ }
+ break;
+ case stop:
+ driver.setTaskTargetState(resource, TargetState.STOP);
+ break;
+ case resume:
+ driver.setTaskTargetState(resource, TargetState.START);
+ break;
+ case delete:
+ driver.setTaskTargetState(resource, TargetState.DELETE);
+ break;
+ case list:
+ driver.list(resource);
+ default:
+ throw new IllegalArgumentException("Unknown command " + args[0]);
+ }
+ }
+ catch(IllegalArgumentException e)
+ {
+ LOG.error("Unknown driver command " + args[0]);
+ throw e;
+ }
+
+ helixMgr.disconnect();
+ }
+
+ /** Schedules a new workflow */
+ public void start(Workflow flow) throws Exception
+ {
+ // TODO: check that namespace for workflow is available
+ LOG.info("Starting workflow " + flow.getName());
+ flow.validate();
+
+ String flowName = flow.getName();
+
+ // first, add workflow config to ZK
+ _admin.setConfig(TaskUtil.getResourceConfigScope(_clusterName, flowName),
+ flow.getResourceConfigMap());
+
+ // then schedule tasks
+ for(String task : flow.getTaskConfigs().keySet())
+ {
+ scheduleTask(task, TaskConfig.Builder.fromMap(flow.getTaskConfigs().get(task)).build());
+ }
+ }
+
+ /** Posts new task to cluster */
+ private void scheduleTask(String taskResource, TaskConfig taskConfig) throws Exception
+ {
+ // Set up task resource based on partitions from target resource
+ int numPartitions = _admin.getResourceIdealState(_clusterName, taskConfig.getTargetResource()).getPartitionSet().size();
+ _admin.addResource(_clusterName, taskResource, numPartitions, TaskConstants.STATE_MODEL_NAME);
+ _admin.setConfig(TaskUtil.getResourceConfigScope(_clusterName, taskResource), taskConfig.getResourceConfigMap());
+
+ // Push out new ideal state based on number of target partitions
+ CustomModeISBuilder builder = new CustomModeISBuilder(taskResource);
+ builder.setRebalancerMode(IdealState.RebalanceMode.USER_DEFINED);
+ builder.setNumReplica(1);
+ builder.setNumPartitions(numPartitions);
+ builder.setStateModel(TaskConstants.STATE_MODEL_NAME);
+ for (int i = 0; i < numPartitions; i++)
+ {
+ builder.add(taskResource + "_" + i);
+ }
+ IdealState is = builder.build();
+ is.setRebalancerClassName(TaskRebalancer.class.getName());
+ _admin.setResourceIdealState(_clusterName, taskResource, is);
+ }
+
+ /** Public method to resume a task/workflow */
+ public void resume(String resource)
+ {
+ setTaskTargetState(resource, TargetState.START);
+ }
+
+ /** Public method to stop a task/workflow */
+ public void stop(String resource)
+ {
+ setTaskTargetState(resource, TargetState.STOP);
+ }
+
+ /** Public method to delete a task/workflow */
+ public void delete(String resource)
+ {
+ setTaskTargetState(resource, TargetState.DELETE);
+ }
+
+ /** Helper function to change target state for a given task */
+ private void setTaskTargetState(String taskResource, TargetState state)
+ {
+ HelixDataAccessor accessor = _manager.getHelixDataAccessor();
+ HelixProperty p = new HelixProperty(taskResource);
+ p.getRecord().setSimpleField(WorkflowConfig.TARGET_STATE, state.name());
+ accessor.updateProperty(accessor.keyBuilder().resourceConfig(taskResource), p);
+
+ invokeRebalance();
+ }
+
+ public void list(String resource)
+ {
+ WorkflowConfig wCfg = TaskUtil.getWorkflowCfg(_manager, resource);
+ WorkflowContext wCtx = TaskUtil.getWorkflowContext(_manager, resource);
+
+ LOG.info("Workflow " + resource + " consists of the following tasks: " + wCfg.getTaskDag().getAllNodes());
+ LOG.info("Current state of workflow is " + wCtx.getWorkflowState().name());
+ LOG.info("Task states are: ");
+ LOG.info("-------");
+ for(String task : wCfg.getTaskDag().getAllNodes())
+ {
+ LOG.info("Task " + task + " is " + wCtx.getTaskState(task));
+
+ // fetch task information
+ TaskContext tCtx = TaskUtil.getTaskContext(_manager, task);
+ TaskConfig tCfg = TaskUtil.getTaskCfg(_manager, task);
+
+ // calculate taskPartitions
+ List<Integer> partitions;
+ if(tCfg.getTargetPartitions() != null)
+ {
+ partitions = tCfg.getTargetPartitions();
+ }
+ else
+ {
+ partitions = new ArrayList<Integer>();
+ for(String pStr : _admin.getResourceIdealState(_clusterName, tCfg.getTargetResource()).getPartitionSet())
+ {
+ partitions.add(Integer.parseInt(pStr.substring(pStr.lastIndexOf("_") + 1, pStr.length())));
+ }
+ }
+
+ // group partitions by status
+ Map<TaskPartitionState, Integer> statusCount = new TreeMap<TaskPartitionState, Integer>();
+ for(Integer i : partitions)
+ {
+ TaskPartitionState s = tCtx.getPartitionState(i);
+ if(!statusCount.containsKey(s))
+ {
+ statusCount.put(s, 0);
+ }
+ statusCount.put(s, statusCount.get(s) + 1);
+ }
+
+ for(TaskPartitionState s : statusCount.keySet())
+ {
+ LOG.info(statusCount.get(s) + "/" + partitions.size() + " in state " + s.name());
+ }
+
+ LOG.info("-------");
+ }
+ }
+
+ /** Hack to invoke rebalance until bug concerning resource config changes not driving rebalance is fixed */
+ public void invokeRebalance()
+ {
+ // find a task
+ for(String resource : _admin.getResourcesInCluster(_clusterName))
+ {
+ IdealState is = _admin.getResourceIdealState(_clusterName, resource);
+ if(is.getStateModelDefRef().equals(TaskConstants.STATE_MODEL_NAME))
+ {
+ HelixDataAccessor accessor = _manager.getHelixDataAccessor();
+ accessor.updateProperty(accessor.keyBuilder().idealStates(resource), is);
+ break;
+ }
+ }
+ }
+
+ /** Constructs options set for all basic control messages */
+ private static Options constructOptions()
+ {
+ Options options = new Options();
+ options.addOptionGroup(contructGenericRequiredOptionGroup());
+ options.addOptionGroup(constructStartOptionGroup());
+ return options;
+ }
+
+ /** Constructs option group containing options required by all drivable tasks */
+ private static OptionGroup contructGenericRequiredOptionGroup()
+ {
+ Option zkAddressOption = OptionBuilder.isRequired().withLongOpt(ZK_ADDRESS)
+ .withDescription("ZK address managing target cluster").create();
+ zkAddressOption.setArgs(1);
+ zkAddressOption.setArgName("zkAddress");
+
+ Option clusterNameOption = OptionBuilder.isRequired().withLongOpt(CLUSTER_NAME_OPTION)
+ .withDescription("Target cluster name").create();
+ clusterNameOption.setArgs(1);
+ clusterNameOption.setArgName("clusterName");
+
+ Option taskResourceOption = OptionBuilder.isRequired().withLongOpt(RESOURCE_OPTION)
+ .withDescription("Target workflow or task").create();
+ taskResourceOption.setArgs(1);
+ taskResourceOption.setArgName("resourceName");
+
+ OptionGroup group = new OptionGroup();
+ group.addOption(zkAddressOption);
+ group.addOption(clusterNameOption);
+ group.addOption(taskResourceOption);
+ return group;
+ }
+
+ /** Constructs option group containing options required by all drivable tasks */
+ private static OptionGroup constructStartOptionGroup()
+ {
+ Option workflowFileOption = OptionBuilder.withLongOpt(WORKFLOW_FILE_OPTION)
+ .withDescription("Local file describing workflow").create();
+ workflowFileOption.setArgs(1);
+ workflowFileOption.setArgName("workflowFile");
+
+ OptionGroup group = new OptionGroup();
+ group.addOption(workflowFileOption);
+ return group;
+ }
+
+ /** Attempts to parse options for given command, printing usage under failure */
+ private static CommandLine parseOptions(String[] args, Options options, String cmdStr)
+ {
+ CommandLineParser cliParser = new GnuParser();
+ CommandLine cmd = null;
+
+ try
+ {
+ cmd = cliParser.parse(options, args);
+ }
+ catch (ParseException pe)
+ {
+ LOG.error("CommandLineClient: failed to parse command-line options: "
+ + pe.toString());
+ printUsage(options, cmdStr);
+ System.exit(1);
+ }
+ boolean ret = checkOptionArgsNumber(cmd.getOptions());
+ if (!ret)
+ {
+ printUsage(options, cmdStr);
+ System.exit(1);
+ }
+
+ return cmd;
+ }
+
+ /** Ensures options argument counts are correct */
+ private static boolean checkOptionArgsNumber(Option[] options)
+ {
+ for (Option option : options)
+ {
+ int argNb = option.getArgs();
+ String[] args = option.getValues();
+ if (argNb == 0)
+ {
+ if (args != null && args.length > 0)
+ {
+ System.err.println(option.getArgName() + " shall have " + argNb + " arguments (was "
+ + Arrays.toString(args) + ")");
+ return false;
+ }
+ } else
+ {
+ if (args == null || args.length != argNb)
+ {
+ System.err.println(option.getArgName() + " shall have " + argNb + " arguments (was "
+ + Arrays.toString(args) + ")");
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ /** Displays CLI usage for given option set and command name */
+ private static void printUsage(Options cliOptions, String cmd)
+ {
+ HelpFormatter helpFormatter = new HelpFormatter();
+ helpFormatter.setWidth(1000);
+ helpFormatter.printHelp("java " + TaskDriver.class.getName() + " " + cmd, cliOptions);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskFactory.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskFactory.java b/helix-core/src/main/java/org/apache/helix/task/TaskFactory.java
new file mode 100644
index 0000000..02d5cf2
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskFactory.java
@@ -0,0 +1,23 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+/**
+ * A factory for {@link Task} objects.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public interface TaskFactory
+{
+ /**
+ * Returns a {@link Task} instance.
+ *
+ * @param config Configuration information for the task.
+ *
+ * @return A {@link Task} instance.
+ */
+ Task createNewTask(String config);
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/helix-core/src/main/java/org/apache/helix/task/TaskPartitionState.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/task/TaskPartitionState.java b/helix-core/src/main/java/org/apache/helix/task/TaskPartitionState.java
new file mode 100644
index 0000000..245bb7a
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/task/TaskPartitionState.java
@@ -0,0 +1,31 @@
+/*
+ * $Id$
+ */
+package org.apache.helix.task;
+
+
+/**
+ * Enumeration of the states in the "Task" state model.
+ *
+ * @author Abe <as...@linkedin.com>
+ * @version $Revision$
+ */
+public enum TaskPartitionState
+{
+ /** The initial state of the state model. */
+ INIT,
+ /** Indicates that the task is currently running. */
+ RUNNING,
+ /** Indicates that the task was stopped by the controller. */
+ STOPPED,
+ /** Indicates that the task completed normally. */
+ COMPLETED,
+ /** Indicates that the task timed out. */
+ TIMED_OUT,
+ /** Indicates an error occurred during task execution. */
+ TASK_ERROR,
+ /** Helix's own internal error state. */
+ ERROR,
+ /** A Helix internal state. */
+ DROPPED
+}
[10/15] Adding Helix-task-framework and Yarn integration modules
Posted by ki...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModel.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModel.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModel.java
new file mode 100644
index 0000000..814387f
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModel.java
@@ -0,0 +1,114 @@
+package org.apache.helix.autoscale.provider;
+
+import org.apache.helix.NotificationContext;
+import org.apache.helix.autoscale.ClusterAdmin;
+import org.apache.helix.autoscale.ContainerProvider;
+import org.apache.helix.model.Message;
+import org.apache.helix.participant.statemachine.StateModel;
+import org.apache.helix.participant.statemachine.StateModelInfo;
+import org.apache.helix.participant.statemachine.Transition;
+import org.apache.log4j.Logger;
+
+/**
+ * Helix state model implementation for {@link ContainerProvider}s. Updates
+ * configuration of managed Helix cluster and spawns and destroys container
+ * instances.
+ *
+ */
+@StateModelInfo(initialState = "OFFLINE", states = { "OFFLINE", "ONLINE" })
+public class ProviderStateModel extends StateModel {
+
+ static final Logger log = Logger.getLogger(ProviderStateModel.class);
+
+ ContainerProvider provider;
+ ClusterAdmin admin;
+
+ public ProviderStateModel(ContainerProvider provider, ClusterAdmin admin) {
+ this.provider = provider;
+ this.admin = admin;
+ }
+
+ @Transition(from = "OFFLINE", to = "ONLINE")
+ public void acquire(Message m, NotificationContext context) throws Exception {
+ String containerType = m.getResourceName();
+ String containerId = m.getPartitionName();
+ String instanceId = context.getManager().getInstanceName();
+
+ log.trace(String.format("%s:%s transitioning from OFFLINE to ONLINE", containerId, instanceId));
+
+ bestEffortRemove(containerId);
+
+ // add instance to cluster
+ admin.addInstance(containerId, containerType);
+
+ // create container
+ provider.create(containerId, containerType);
+
+ try {
+ admin.rebalance();
+ } catch (Exception e) {
+ // ignore
+ log.warn(String.format("rebalancing cluster failed (error='%s')", e.getMessage()));
+ }
+
+ log.info(String.format("%s acquired container '%s' (type='%s')", instanceId, containerId, containerType));
+ }
+
+ @Transition(from = "ONLINE", to = "OFFLINE")
+ public void release(Message m, NotificationContext context) {
+ String containerId = m.getPartitionName();
+ String instanceId = context.getManager().getInstanceName();
+
+ log.trace(String.format("%s:%s transitioning from ONLINE to OFFLINE", containerId, instanceId));
+
+ bestEffortRemove(containerId);
+
+ try {
+ admin.rebalance();
+ } catch (Exception e) {
+ // ignore
+ log.warn(String.format("rebalancing cluster failed (error='%s')", e.getMessage()));
+ }
+
+ log.info(String.format("%s destroyed container '%s'", instanceId, containerId));
+
+ }
+
+ @Transition(from = "ERROR", to = "OFFLINE")
+ public void recover(Message m, NotificationContext context) {
+ String containerId = m.getPartitionName();
+ String instanceId = context.getManager().getInstanceName();
+
+ log.trace(String.format("%s:%s transitioning from ERROR to OFFLINE", containerId, instanceId));
+
+ release(m, context);
+ }
+
+ @Transition(from = "OFFLINE", to = "DROPPED")
+ public void drop(Message m, NotificationContext context) {
+ String containerId = m.getPartitionName();
+ String instanceId = context.getManager().getInstanceName();
+
+ log.trace(String.format("%s:%s transitioning from OFFLINE to DROPPED", containerId, instanceId));
+ }
+
+ private void bestEffortRemove(String containerId) {
+ log.debug(String.format("Best effort removal of container '%s'", containerId));
+
+ try {
+ provider.destroy(containerId);
+ log.debug(String.format("Container '%s' destroyed", containerId));
+ } catch (Exception e) {
+ log.debug(String.format("Container '%s' does not exist", containerId));
+ }
+
+ try {
+ admin.removeInstance(containerId);
+ log.debug(String.format("Instance '%s' removed", containerId));
+ } catch (Exception e) {
+ log.debug(String.format("Instance '%s' does not exist", containerId));
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModelFactory.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModelFactory.java b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModelFactory.java
new file mode 100644
index 0000000..2613336
--- /dev/null
+++ b/recipes/auto-scale/src/main/java/org/apache/helix/autoscale/provider/ProviderStateModelFactory.java
@@ -0,0 +1,27 @@
+package org.apache.helix.autoscale.provider;
+
+import org.apache.helix.autoscale.ClusterAdmin;
+import org.apache.helix.autoscale.ContainerProvider;
+import org.apache.helix.participant.statemachine.StateModelFactory;
+
+/**
+ * Factory for {@link ProviderStateModel}. Injects {@link ClusterAdmin} for
+ * managed cluster and {@link ContainerProvider}.
+ *
+ */
+class ProviderStateModelFactory extends StateModelFactory<ProviderStateModel> {
+
+ final ContainerProvider provider;
+ final ClusterAdmin admin;
+
+ public ProviderStateModelFactory(ContainerProvider provider, ClusterAdmin admin) {
+ super();
+ this.provider = provider;
+ this.admin = admin;
+ }
+
+ @Override
+ public ProviderStateModel createNewStateModel(String partitionName) {
+ return new ProviderStateModel(provider, admin);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/Boot2By2Local.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/Boot2By2Local.properties b/recipes/auto-scale/src/main/resources/Boot2By2Local.properties
new file mode 100644
index 0000000..13fb4ff
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/Boot2By2Local.properties
@@ -0,0 +1,87 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/autoscale/zk/data
+zookeeper.logdir=/tmp/autoscale/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=localhost:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=localhost:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=localhost:2199
+metaresource.0.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=localhost:2199
+metaresource.1.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=localhost:2199
+metaprovider.0.class=org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=localhost:2199
+metaprovider.1.class=org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.autoscale.impl.local.LocalStatusProvider
+
+metacontroller.target.class=org.apache.helix.autoscale.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/Boot2By2Shell.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/Boot2By2Shell.properties b/recipes/auto-scale/src/main/resources/Boot2By2Shell.properties
new file mode 100644
index 0000000..079771b
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/Boot2By2Shell.properties
@@ -0,0 +1,87 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/autoscale/zk/data
+zookeeper.logdir=/tmp/autoscale/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=localhost:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=localhost:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=localhost:2199
+metaresource.0.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=localhost:2199
+metaresource.1.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=localhost:2199
+metaprovider.0.class=org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=localhost:2199
+metaprovider.1.class=org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.autoscale.impl.shell.ShellStatusProvider
+
+metacontroller.target.class=org.apache.helix.autoscale.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/Boot2By2Yarn.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/Boot2By2Yarn.properties b/recipes/auto-scale/src/main/resources/Boot2By2Yarn.properties
new file mode 100644
index 0000000..e447711
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/Boot2By2Yarn.properties
@@ -0,0 +1,98 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/autoscale/zk/data
+zookeeper.logdir=/tmp/autoscale/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=rm:2199
+
+#
+# Resource
+#
+resource.0.name=wsprod
+resource.0.cluster=cluster
+resource.0.address=rm:2199
+resource.0.container=webserver
+resource.0.model=MasterSlave
+resource.0.partitions=15
+resource.0.replica=1
+
+resource.1.name=dbprod
+resource.1.cluster=cluster
+resource.1.address=rm:2199
+resource.1.container=database
+resource.1.model=MasterSlave
+resource.1.partitions=8
+resource.1.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=rm:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=rm:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=rm:2199
+
+#
+# Metaresource
+#
+metaresource.0.name=webserver
+metaresource.0.metacluster=meta
+metaresource.0.metaaddress=rm:2199
+metaresource.0.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+metaresource.1.name=database
+metaresource.1.metacluster=meta
+metaresource.1.metaaddress=rm:2199
+metaresource.1.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.0.name=provider0
+metaprovider.0.metacluster=meta
+metaprovider.0.metaaddress=rm:2199
+metaprovider.0.class=org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess
+metaprovider.0.yarndata=rm:2199
+metaprovider.0.resourcemananger=rm:8032
+metaprovider.0.scheduler=rm:8030
+metaprovider.0.user=yarn
+metaprovider.0.hdfs=hdfs://rm:9000/
+
+metaprovider.1.name=provider1
+metaprovider.1.metacluster=meta
+metaprovider.1.metaaddress=rm:2199
+metaprovider.1.class=org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess
+metaprovider.1.yarndata=rm:2199
+metaprovider.1.resourcemananger=rm:8032
+metaprovider.1.scheduler=rm:8030
+metaprovider.1.user=yarn
+metaprovider.1.hdfs=hdfs://rm:9000/
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=rm:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.autoscale.impl.yarn.YarnStatusProvider
+metacontroller.status.yarndata=rm:2199
+
+metacontroller.target.class=org.apache.helix.autoscale.impl.StaticTargetProvider
+metacontroller.target.webserver=5
+metacontroller.target.database=3
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/BootLocal.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/BootLocal.properties b/recipes/auto-scale/src/main/resources/BootLocal.properties
new file mode 100644
index 0000000..15905fc
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/BootLocal.properties
@@ -0,0 +1,68 @@
+#
+# Zookeeper (optional)
+#
+zookeeper.datadir=/tmp/autoscale/zk/data
+zookeeper.logdir=/tmp/autoscale/zk/log
+zookeeper.port=2199
+
+#
+# Cluster
+#
+cluster.name=cluster
+cluster.address=localhost:2199
+
+#
+# Resource
+#
+resource.name=resource
+resource.cluster=cluster
+resource.address=localhost:2199
+resource.container=container
+resource.model=MasterSlave
+resource.partitions=10
+resource.replica=3
+
+#
+# Controller
+#
+controller.name=controller
+controller.cluster=cluster
+controller.address=localhost:2199
+controller.autorefresh=5000
+
+#
+# Metacluster
+#
+metacluster.name=meta
+metacluster.address=localhost:2199
+metacluster.managedcluster=cluster
+metacluster.managedaddress=localhost:2199
+
+#
+# Metaresource
+#
+metaresource.name=container
+metaresource.metacluster=meta
+metaresource.metaaddress=localhost:2199
+metaresource.class=org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess
+
+#
+# Metaprovider
+#
+metaprovider.name=provider
+metaprovider.metacluster=meta
+metaprovider.metaaddress=localhost:2199
+metaprovider.class=org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess
+
+#
+# Metacontroller
+#
+metacontroller.name=metacontroller
+metacontroller.metacluster=meta
+metacontroller.metaaddress=localhost:2199
+metacontroller.autorefresh=5000
+
+metacontroller.status.class=org.apache.helix.autoscale.impl.local.LocalStatusProvider
+
+metacontroller.target.class=org.apache.helix.autoscale.impl.StaticTargetProvider
+metacontroller.target.container=7
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/RedisYarnSample.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/RedisYarnSample.properties b/recipes/auto-scale/src/main/resources/RedisYarnSample.properties
new file mode 100644
index 0000000..eb58fd2
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/RedisYarnSample.properties
@@ -0,0 +1,89 @@
+###############################################################################
+# Managed cluster configuration
+###############################################################################
+
+cluster.name = rediscluster
+cluster.address = helix.zookeeper.intra
+
+#
+# Helix cluster resources
+# (as usual)
+#
+resource.0.name = redis
+resource.0.cluster = rediscluster
+resource.0.address = helix.zookeeper.intra
+resource.0.container = rediscontainer # see meta cluster
+resource.0.model = OnlineOffline
+resource.0.partitions = 1024 # are auto-assigned
+resource.0.replica = 1 # no replication needed
+
+#
+# Helix cluster controllers
+# (as usual)
+#
+controller.name = controller
+controller.cluster = rediscluster
+controller.address = helix.zookeeper.intra
+
+###############################################################################
+# Meta cluster configuration
+###############################################################################
+
+metacluster.name = meta
+metacluster.address = helix.zookeeper.intra
+metacluster.managedcluster = rediscluster
+metacluster.managedaddress = helix.zookeeper.intra
+
+#
+# Container Types
+# (Base configuration for instances spawned by providers)
+#
+metaresource.0.name = rediscontainer
+metaresource.0.class = org.apache.helix.autoscale.impl.container.RedisServerProcess
+metaresource.0.metacluster = meta
+metaresource.0.metaaddress = helix.zookeeper.intra
+metaresource.0.address = apps.zookeeper.intra
+metaresource.0.baseport = 17000 # instance id is added
+
+#
+# Container Instance Providers
+# (Endpoints of container deployment frameworks, e.g. Apache YARN)
+#
+metaprovider.0.name = provider
+metaprovider.0.class = org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess
+metaprovider.0.metacluster = meta
+metaprovider.0.metaaddress = helix.zookeeper.intra
+metaprovider.0.yarndata = yarn.zookeeper.intra
+metaprovider.0.resourcemananger = yarn-rm.intra:8032
+metaprovider.0.scheduler = yarn-rm.intra:8030
+metaprovider.0.hdfs = hdfs://yarn-hdfs.intra:9000/
+metaprovider.0.user = yarnuser
+
+#
+# Helix meta cluster controller
+# (Monitors system state and adapts config of Container Instance Providers)
+#
+metacontroller.name = metacontroller
+metacontroller.metacluster = meta
+metacontroller.metaddress = helix.zookeeper.intra
+metacontroller.autorefresh = 10000 # status refresh interval
+
+#
+# Container Status Provider
+# (Provides low-level data on container instance health)
+#
+metacontroller.status.class = org.apache.helix.autoscale.impl.yarn.YarnStatusProvider
+metacontroller.status.yarndata = yarn.zookeeper.intra
+
+#
+# Performance Target Model
+# (Provides target number of container instances)
+#
+metacontroller.target.class = org.apache.helix.autoscale.impl.RedisTargetProvider
+metacontroller.target.address = apps.zookeeper.intra
+metacontroller.target.interval = 10000 # interval for Tps probes
+metacontroller.target.timeout = 9000 # timeout of probe
+metacontroller.target.get = 1000000 # target "GET" Tps
+metacontroller.target.min = 1 # min container count
+metacontroller.target.max = 23 # max container count
+metacontroller.target.alpha = 0.1 # exponential average
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/main/resources/log4j.properties b/recipes/auto-scale/src/main/resources/log4j.properties
new file mode 100644
index 0000000..7f29be2
--- /dev/null
+++ b/recipes/auto-scale/src/main/resources/log4j.properties
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Set root logger level to DEBUG and its only appender to R.
+log4j.rootLogger=ERROR, C
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.C=org.apache.log4j.ConsoleAppender
+log4j.appender.C.layout=org.apache.log4j.PatternLayout
+log4j.appender.C.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.I0Itec=ERROR
+log4j.logger.org.apache=ERROR
+
+log4j.logger.org.apache.helix.autoscale=INFO
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/config/testng.xml
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/config/testng.xml b/recipes/auto-scale/src/test/config/testng.xml
new file mode 100644
index 0000000..f710791
--- /dev/null
+++ b/recipes/auto-scale/src/test/config/testng.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="Suite" parallel="none">
+ <test name="Test" preserve-order="true">
+ <packages>
+ <package name="org.apache.helix.autoscale.*"/>
+ </packages>
+ </test>
+</suite>
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/BootstrapperIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/BootstrapperIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/BootstrapperIT.java
new file mode 100644
index 0000000..5dd7820
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/BootstrapperIT.java
@@ -0,0 +1,134 @@
+package org.apache.helix.autoscale;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Properties;
+
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.bootstrapper.Boot;
+import org.apache.helix.autoscale.bootstrapper.ClusterService;
+import org.apache.helix.autoscale.bootstrapper.ControllerService;
+import org.apache.helix.autoscale.bootstrapper.MetaClusterService;
+import org.apache.helix.autoscale.bootstrapper.MetaControllerService;
+import org.apache.helix.autoscale.bootstrapper.MetaProviderService;
+import org.apache.helix.autoscale.bootstrapper.MetaResourceService;
+import org.apache.helix.autoscale.bootstrapper.ResourceService;
+import org.apache.helix.autoscale.bootstrapper.ZookeeperService;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.log4j.Logger;
+import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Bootstrapping tool test. Reads cluster configuration from *.properties files,
+ * spawns services and verifies number of active partitions and containers
+ *
+ * @see Boot
+ */
+@Test(groups = { "integration", "boot" })
+public class BootstrapperIT {
+
+ static final Logger log = Logger.getLogger(BootstrapperIT.class);
+
+ Boot boot;
+ HelixAdmin admin;
+
+ @AfterMethod(alwaysRun = true)
+ public void teardown() throws Exception {
+ log.debug("tearing down bootstrap test");
+ if (admin != null) {
+ admin.close();
+ admin = null;
+ }
+ if (boot != null) {
+ boot.stop();
+ boot = null;
+ }
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+ public void bootstrapLocalTest() throws Exception {
+ boot = new Boot();
+ boot.configure(getProperties("BootLocal.properties"));
+ boot.start();
+
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), ZookeeperService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), ClusterService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), ResourceService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), ControllerService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaClusterService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaResourceService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaProviderService.class));
+ Assert.assertTrue(containsInstanceOf(boot.getServcies(), MetaControllerService.class));
+
+ final long limit = System.currentTimeMillis() + TestUtils.REBALANCE_TIMEOUT;
+
+ admin = new ZKHelixAdmin("localhost:2199");
+ waitUntil(admin, "meta", "container", 1, 7, (limit - System.currentTimeMillis()));
+ waitUntil(admin, "cluster", "resource", 7, 10, (limit - System.currentTimeMillis()));
+
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+ public void bootstrap2By2LocalTest() throws Exception {
+ boot = new Boot();
+ boot.configure(getProperties("Boot2By2Local.properties"));
+ boot.start();
+
+ verify2By2Setup();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+ public void bootstrap2By2ShellTest() throws Exception {
+ boot = new Boot();
+ boot.configure(getProperties("Boot2By2Shell.properties"));
+ boot.start();
+
+ verify2By2Setup();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+ public void bootstrap2By2YarnTest() throws Exception {
+ boot = new Boot();
+ boot.configure(getProperties("Boot2By2Yarn.properties"));
+ boot.start();
+
+ verify2By2Setup();
+ }
+
+ void verify2By2Setup() throws Exception {
+ final long limit = System.currentTimeMillis() + TestUtils.REBALANCE_TIMEOUT;
+ final String address = "localhost:2199";
+
+ log.debug(String.format("connecting to zookeeper at '%s'", address));
+
+ admin = new ZKHelixAdmin(address);
+ waitUntil(admin, "meta", "database", 2, 3, (limit - System.currentTimeMillis()));
+ waitUntil(admin, "meta", "webserver", 2, 5, (limit - System.currentTimeMillis()));
+ waitUntil(admin, "cluster", "dbprod", 3, 8, (limit - System.currentTimeMillis()));
+ waitUntil(admin, "cluster", "wsprod", 5, 15, (limit - System.currentTimeMillis()));
+ }
+
+ static void waitUntil(HelixAdmin admin, String cluster, String resource, int instanceCount, int partitionCount, long timeout) throws Exception {
+ final long limit = System.currentTimeMillis() + timeout;
+ TestUtils.waitUntilInstanceCount(admin, cluster, resource, instanceCount, (limit - System.currentTimeMillis()));
+ TestUtils.waitUntilPartitionCount(admin, cluster, resource, partitionCount, (limit - System.currentTimeMillis()));
+ }
+
+ static Properties getProperties(String resourcePath) throws IOException {
+ Properties properties = new Properties();
+ properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+ return properties;
+ }
+
+ static boolean containsInstanceOf(Collection<Service> services, Class<?> clazz) {
+ for (Service service : services) {
+ if (clazz.isAssignableFrom(service.getClass()))
+ return true;
+ }
+ return false;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/FailoverIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/FailoverIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/FailoverIT.java
new file mode 100644
index 0000000..429146a
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/FailoverIT.java
@@ -0,0 +1,195 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.autoscale.impl.local.LocalContainerSingleton;
+import org.apache.helix.autoscale.impl.local.LocalStatusProvider;
+import org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.autoscale.impl.shell.ShellContainerSingleton;
+import org.apache.helix.autoscale.impl.shell.ShellStatusProvider;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.autoscale.impl.yarn.YarnStatusProvider;
+import org.apache.helix.autoscale.impl.yarn.ZookeeperYarnDataProvider;
+import org.apache.helix.autoscale.provider.ProviderRebalancer;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Fault-recovery test for individual containers and whole providers. Missing
+ * containers should be replaced by the meta cluster Rebalancer using remaining
+ * active providers.
+ *
+ * @see ProviderRebalancer
+ */
+@Test(groups = { "integration", "failure" })
+public class FailoverIT {
+
+ static final Logger log = Logger.getLogger(FailoverIT.class);
+
+ static final int CONTAINER_COUNT = 7;
+
+ StaticTargetProvider targetProvider;
+ YarnStatusProvider yarnStatusProvider;
+
+ @BeforeClass(alwaysRun = true)
+ public void setupClass() {
+ log.info("installing shutdown hook");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ try {
+ teardownTest();
+ } catch (Exception ignore) {
+ }
+ ;
+ }
+ }));
+ }
+
+ @BeforeMethod(alwaysRun = true)
+ public void setupTest() throws Exception {
+ teardownTest();
+ targetProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ }
+
+ @AfterMethod(alwaysRun = true)
+ public void teardownTest() throws Exception {
+ TestUtils.stopTestCluster();
+
+ if (yarnStatusProvider != null) {
+ yarnStatusProvider.stop();
+ yarnStatusProvider = null;
+ }
+
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+ public void testLocalContainerFailover() throws Exception {
+ log.info("testing local container failover");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ TestUtils.startTestCluster(targetProvider, new LocalStatusProvider(), makeLocalProviders(3));
+ killLocalContainers();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "local" })
+ public void testLocalProviderFailover() throws Exception {
+ log.info("testing local provider failover");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ TestUtils.startTestCluster(targetProvider, new LocalStatusProvider(), makeLocalProviders(3));
+ killProvider();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+ public void testShellContainerFailover() throws Exception {
+ log.info("testing shell container failover");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ TestUtils.startTestCluster(targetProvider, new ShellStatusProvider(), makeShellProviders(3));
+ killShellContainers();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "shell" })
+ public void testShellProviderFailover() throws Exception {
+ log.info("testing shell provider failover");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ TestUtils.startTestCluster(targetProvider, new ShellStatusProvider(), makeShellProviders(3));
+ killProvider();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+ public void testYarnContainerFailover() throws Exception {
+ log.info("testing yarn container failover");
+ TestUtils.configure("distributed.properties");
+ TestUtils.startZookeeper();
+ yarnStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+ yarnStatusProvider.start();
+ TestUtils.startTestCluster(targetProvider, yarnStatusProvider, makeYarnProviders(3));
+ killYarnContainers();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT, groups = { "yarn" })
+ public void testYarnProviderFailover() throws Exception {
+ log.info("testing yarn provider failover");
+ TestUtils.configure("distributed.properties");
+ TestUtils.startZookeeper();
+ yarnStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+ yarnStatusProvider.start();
+ TestUtils.startTestCluster(targetProvider, yarnStatusProvider, makeYarnProviders(3));
+ killProvider();
+ }
+
+ void killLocalContainers() throws Exception {
+ LocalContainerSingleton.killProcess("container_2");
+ LocalContainerSingleton.killProcess("container_4");
+ LocalContainerSingleton.killProcess("container_6");
+ Thread.sleep(3000);
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+ }
+
+ void killShellContainers() throws Exception {
+ ShellContainerSingleton.killProcess("container_2");
+ ShellContainerSingleton.killProcess("container_4");
+ ShellContainerSingleton.killProcess("container_6");
+ Thread.sleep(3000);
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+ }
+
+ void killYarnContainers() throws Exception {
+ ZookeeperYarnDataProvider yarnDataService = new ZookeeperYarnDataProvider(TestUtils.zkAddress);
+ yarnDataService.start();
+ yarnDataService.delete("container_2");
+ yarnDataService.delete("container_4");
+ yarnDataService.delete("container_6");
+ yarnDataService.stop();
+ Thread.sleep(3000);
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+ }
+
+ static void killProvider() throws Exception {
+ Iterator<Service> itService = TestUtils.providerServices.iterator();
+ itService.next().stop();
+ itService.remove();
+
+ TestUtils.rebalanceTestCluster();
+ TestUtils.waitUntilRebalancedCount(CONTAINER_COUNT);
+ }
+
+ LocalContainerProviderProcess[] makeLocalProviders(int count) throws Exception {
+ LocalContainerProviderProcess[] localProviders = new LocalContainerProviderProcess[count];
+ for (int i = 0; i < count; i++) {
+ localProviders[i] = TestUtils.makeLocalProvider("provider_" + i);
+ }
+ return localProviders;
+ }
+
+ ShellContainerProviderProcess[] makeShellProviders(int count) throws Exception {
+ ShellContainerProviderProcess[] shellProviders = new ShellContainerProviderProcess[count];
+ for (int i = 0; i < count; i++) {
+ shellProviders[i] = TestUtils.makeShellProvider("provider_" + i);
+ }
+ return shellProviders;
+ }
+
+ YarnContainerProviderProcess[] makeYarnProviders(int count) throws Exception {
+ YarnContainerProviderProcess[] yarnProviders = new YarnContainerProviderProcess[count];
+ for (int i = 0; i < count; i++) {
+ yarnProviders[i] = TestUtils.makeYarnProvider("provider_" + i);
+ }
+ return yarnProviders;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/LocalContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/LocalContainerProviderIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/LocalContainerProviderIT.java
new file mode 100644
index 0000000..94ea5ac
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/LocalContainerProviderIT.java
@@ -0,0 +1,80 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.autoscale.impl.local.LocalStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Local container provider and local status provider test. Scale-up and -down
+ * only, no failures.
+ *
+ * @see LocalContainerProvider
+ * @see LocalStatusProvider
+ */
+@Test(groups = { "integration", "local" })
+public class LocalContainerProviderIT {
+
+ static final Logger log = Logger.getLogger(LocalContainerProviderIT.class);
+
+ static final int CONTAINER_COUNT = 4;
+
+ StaticTargetProvider clusterStatusProvider;
+ LocalContainerProviderProcess containerProvider;
+ LocalStatusProvider containerStatusProvider;
+
+ @BeforeMethod(alwaysRun = true)
+ public void setupTest() throws Exception {
+ teardownTest();
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ containerProvider = TestUtils.makeLocalProvider("provider_0");
+ clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ containerStatusProvider = new LocalStatusProvider();
+ TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+ }
+
+ @AfterMethod(alwaysRun = true)
+ public void teardownTest() throws Exception {
+ TestUtils.stopTestCluster();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testStatic() throws Exception {
+ log.info("testing static");
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleUp() throws Exception {
+ log.info("testing scale up");
+ setContainerCount(CONTAINER_COUNT + 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleDown() throws Exception {
+ log.info("testing scale down");
+ setContainerCount(CONTAINER_COUNT - 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleCycle() throws Exception {
+ log.info("testing scale cycle");
+ setContainerCount(CONTAINER_COUNT + 2);
+ setContainerCount(CONTAINER_COUNT);
+ setContainerCount(CONTAINER_COUNT - 2);
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ void setContainerCount(int newContainerCount) throws Exception {
+ log.debug(String.format("Setting container count to %d", newContainerCount));
+ clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+ TestUtils.rebalanceTestCluster();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/ShellContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/ShellContainerProviderIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/ShellContainerProviderIT.java
new file mode 100644
index 0000000..dce4429
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/ShellContainerProviderIT.java
@@ -0,0 +1,95 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.autoscale.impl.shell.ShellStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Shell container provider and shell status provider test. Scale-up and -down
+ * only, no failures.
+ *
+ * @see ShellContainerProvider
+ * @see ShellStatusProvider
+ */
+@Test(groups = { "integration", "shell" })
+public class ShellContainerProviderIT {
+
+ static final Logger log = Logger.getLogger(ShellContainerProviderIT.class);
+
+ static final long TEST_TIMEOUT = 20000;
+ static final long REBALANCE_TIMEOUT = 10000;
+
+ static final int CONTAINER_COUNT = 4;
+
+ StaticTargetProvider clusterStatusProvider;
+ ShellContainerProviderProcess containerProvider;
+ ShellStatusProvider containerStatusProvider;
+
+ @BeforeClass(alwaysRun = true)
+ public void setupClass() {
+ log.info("installing shutdown hook");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ try { teardownTest(); } catch(Exception ignore) {};
+ }
+ }));
+ }
+
+ @BeforeMethod(alwaysRun = true)
+ public void setupTest() throws Exception {
+ teardownTest();
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ containerProvider = TestUtils.makeShellProvider("provider_0");
+ clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ containerStatusProvider = new ShellStatusProvider();
+ TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+ }
+
+ @AfterMethod(alwaysRun = true)
+ public void teardownTest() throws Exception {
+ TestUtils.stopTestCluster();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testStatic() throws Exception {
+ log.info("testing static");
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleUp() throws Exception {
+ log.info("testing scale up");
+ setContainerCount(CONTAINER_COUNT + 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleDown() throws Exception {
+ log.info("testing scale down");
+ setContainerCount(CONTAINER_COUNT - 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleCycle() throws Exception {
+ log.info("testing scale cycle");
+ setContainerCount(CONTAINER_COUNT + 2);
+ setContainerCount(CONTAINER_COUNT);
+ setContainerCount(CONTAINER_COUNT - 2);
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ void setContainerCount(int newContainerCount) throws Exception {
+ log.debug(String.format("Setting container count to %d", newContainerCount));
+ clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+ TestUtils.rebalanceTestCluster();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtils.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtils.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtils.java
new file mode 100644
index 0000000..c68b2ca
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtils.java
@@ -0,0 +1,443 @@
+package org.apache.helix.autoscale;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.TimeoutException;
+
+import org.I0Itec.zkclient.IDefaultNameSpace;
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkServer;
+import org.apache.commons.io.FileUtils;
+import org.apache.helix.HelixAdmin;
+import org.apache.helix.HelixManager;
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.StatusProvider;
+import org.apache.helix.autoscale.StatusProviderService;
+import org.apache.helix.autoscale.TargetProvider;
+import org.apache.helix.autoscale.TargetProviderService;
+import org.apache.helix.autoscale.impl.local.LocalContainerProviderProcess;
+import org.apache.helix.autoscale.impl.shell.ShellContainerProviderProcess;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProperties;
+import org.apache.helix.autoscale.provider.ProviderProperties;
+import org.apache.helix.autoscale.provider.ProviderRebalancer;
+import org.apache.helix.autoscale.provider.ProviderRebalancerSingleton;
+import org.apache.helix.controller.HelixControllerMain;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.model.ExternalView;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.IdealState.RebalanceMode;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.log4j.Logger;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+/**
+ * Utility for creating a test cluster without the bootstrapping tool. Methods
+ * for verifying the number of active instances and partitions in a cluster.
+ *
+ */
+public class TestUtils {
+
+ static final Logger log = Logger.getLogger(TestUtils.class);
+
+ public static int zkPort;
+ public static String zkAddress;
+ public static String resmanAddress;
+ public static String schedulerAddress;
+ public static String hdfsAddress;
+ public static String yarnUser;
+
+ public static final String metaClusterName = "meta-cluster";
+ public static final String managedClusterName = "managed-cluster";
+ public static final String metaResourceName = "container";
+ public static final String managedResourceName = "database";
+
+ public static final int numManagedPartitions = 10;
+ public static final int numManagedReplica = 2;
+
+ public static final long TEST_TIMEOUT = 120000;
+ public static final long REBALANCE_TIMEOUT = 60000;
+ public static final long POLL_INTERVAL = 1000;
+
+ public static final ProviderProperties providerProperties = new ProviderProperties();
+
+ public static ZkServer server = null;
+ public static HelixAdmin admin = null;
+ public static HelixManager metaControllerManager = null;
+ public static HelixManager managedControllerManager = null;
+
+ public static Collection<Service> providerServices = new ArrayList<Service>();
+ public static Collection<Service> auxServices = new ArrayList<Service>();
+
+ public static TargetProvider targetProvider = null;
+ public static StatusProvider statusProvider = null;
+
+ static {
+ try {
+ configure();
+ } catch(Exception e) {
+ log.error("Could not setup TestUtils", e);
+ throw new RuntimeException(e);
+ }
+ }
+
+ private TestUtils() {
+ // left blank
+ }
+
+ public static void configure() throws IOException {
+ configure("standalone.properties");
+ }
+
+ public static void configure(String resourcePath) throws IOException {
+ log.info(String.format("Configuring Test cluster from %s", resourcePath));
+ Properties properties = new Properties();
+ properties.load(ClassLoader.getSystemResourceAsStream(resourcePath));
+ configure(properties);
+ }
+
+ public static void configure(Properties properties) {
+ log.info(String.format("Configuring from properties '%s'", properties));
+
+ zkPort = Integer.valueOf(properties.getProperty("zookeeper.port"));
+ zkAddress = properties.getProperty("zookeeper.address");
+ resmanAddress = properties.getProperty("yarn.resourcemanager");
+ schedulerAddress = properties.getProperty("yarn.scheduler");
+ hdfsAddress = properties.getProperty("yarn.hdfs");
+ yarnUser = properties.getProperty("yarn.user");
+
+ Preconditions.checkNotNull(zkPort);
+ Preconditions.checkNotNull(zkAddress);
+ Preconditions.checkNotNull(resmanAddress);
+ Preconditions.checkNotNull(schedulerAddress);
+ Preconditions.checkNotNull(hdfsAddress);
+ Preconditions.checkNotNull(yarnUser);
+
+ configureInternal();
+ }
+
+ static void configureInternal() {
+ providerProperties.clear();
+ providerProperties.setProperty(ProviderProperties.ADDRESS, zkAddress);
+ providerProperties.setProperty(ProviderProperties.CLUSTER, managedClusterName);
+ providerProperties.setProperty(ProviderProperties.METAADDRESS, zkAddress);
+ providerProperties.setProperty(ProviderProperties.METACLUSTER, metaClusterName);
+ providerProperties.setProperty(ProviderProperties.NAME, "<unknown>");
+
+ Properties containerProperties = new Properties();
+ containerProperties.setProperty("class", "org.apache.helix.autoscale.impl.container.DummyMasterSlaveProcess");
+
+ providerProperties.addContainer("container", containerProperties);
+
+ log.info(String.format("Using provider properties '%s'", providerProperties));
+ }
+
+ public static void startZookeeper() throws Exception {
+ log.info("Starting ZooKeeper");
+
+ if (server != null)
+ throw new IllegalStateException("Zookeeper already running");
+
+ server = createLocalZookeeper();
+ server.start();
+ }
+
+ public static void stopZookeeper() throws Exception {
+ log.info("Stopping ZooKeeper");
+
+ if (server != null) {
+ server.shutdown();
+ server = null;
+ }
+ }
+
+ public static void startTestCluster(TargetProviderService targetProvider, StatusProviderService statusProvider, Service... containerProviderProcesses)
+ throws Exception {
+ log.debug(String.format("Starting test cluster"));
+
+ if (server == null)
+ throw new IllegalStateException("Zookeeper not running yet");
+
+ if (!auxServices.isEmpty() || !providerServices.isEmpty() || admin != null || metaControllerManager != null || managedControllerManager != null)
+ throw new IllegalStateException("TestCluster already running");
+
+ log.debug("Create admin");
+ admin = new ZKHelixAdmin(zkAddress);
+
+ log.debug("Create clusters");
+ admin.addCluster(metaClusterName, true);
+ admin.addCluster(managedClusterName, true);
+
+ log.debug("Setup config tool");
+ ProviderRebalancerSingleton.setTargetProvider(targetProvider);
+ ProviderRebalancerSingleton.setStatusProvider(statusProvider);
+
+ log.debug("Starting target and status provider");
+ TestUtils.targetProvider = startAuxService(targetProvider);
+ TestUtils.statusProvider = startAuxService(statusProvider);
+
+ // Managed Cluster
+ log.debug("Setup managed cluster");
+ admin.addStateModelDef(managedClusterName, "MasterSlave", new StateModelDefinition(StateModelConfigGenerator.generateConfigForMasterSlave()));
+ admin.addResource(managedClusterName, managedResourceName, numManagedPartitions, "MasterSlave", RebalanceMode.FULL_AUTO.toString());
+ IdealState managedIdealState = admin.getResourceIdealState(managedClusterName, managedResourceName);
+ managedIdealState.setInstanceGroupTag(metaResourceName);
+ managedIdealState.setReplicas(String.valueOf(numManagedReplica));
+ admin.setResourceIdealState(managedClusterName, managedResourceName, managedIdealState);
+
+ // Meta Cluster
+ log.debug("Setup meta cluster");
+ admin.addStateModelDef(metaClusterName, "OnlineOffline", new StateModelDefinition(StateModelConfigGenerator.generateConfigForOnlineOffline()));
+ admin.addResource(metaClusterName, metaResourceName, targetProvider.getTargetContainerCount(metaResourceName), "OnlineOffline",
+ RebalanceMode.USER_DEFINED.toString());
+
+ IdealState idealState = admin.getResourceIdealState(metaClusterName, metaResourceName);
+ idealState.setRebalancerClassName(ProviderRebalancer.class.getName());
+ idealState.setReplicas("1");
+
+ // BEGIN workaround
+ // FIXME workaround for HELIX-226
+ Map<String, List<String>> listFields = Maps.newHashMap();
+ Map<String, Map<String, String>> mapFields = Maps.newHashMap();
+ for (int i = 0; i < 256; i++) {
+ String partitionName = metaResourceName + "_" + i;
+ listFields.put(partitionName, new ArrayList<String>());
+ mapFields.put(partitionName, new HashMap<String, String>());
+ }
+ idealState.getRecord().setListFields(listFields);
+ idealState.getRecord().setMapFields(mapFields);
+ // END workaround
+
+ admin.setResourceIdealState(metaClusterName, metaResourceName, idealState);
+
+ log.debug("Starting container providers");
+ for (Service service : containerProviderProcesses) {
+ startProviderService(service);
+ }
+
+ log.debug("Starting managed cluster controller");
+ managedControllerManager = HelixControllerMain.startHelixController(zkAddress, managedClusterName, "managedController", HelixControllerMain.STANDALONE);
+
+ log.debug("Starting meta cluster controller");
+ metaControllerManager = HelixControllerMain.startHelixController(zkAddress, metaClusterName, "metaController", HelixControllerMain.STANDALONE);
+
+ log.debug("Waiting for stable state");
+ waitUntilRebalancedCount(targetProvider.getTargetContainerCount(metaResourceName));
+ }
+
+ public static void stopTestCluster() throws Exception {
+ log.debug(String.format("Stopping test cluster"));
+ if (managedControllerManager != null) {
+ log.info("Disconnecting managed cluster controller");
+ managedControllerManager.disconnect();
+ }
+ if (metaControllerManager != null) {
+ log.info("Disconnecting meta cluster controller");
+ metaControllerManager.disconnect();
+ }
+ log.info("Stopping provider services");
+ if (providerServices != null) {
+ for (Service service : providerServices) {
+ service.stop();
+ }
+ providerServices.clear();
+ }
+ log.debug("Stopping auxillary services");
+ if (auxServices != null) {
+ for (Service service : auxServices) {
+ service.stop();
+ }
+ auxServices.clear();
+ }
+
+ admin = null;
+ metaControllerManager = null;
+ managedControllerManager = null;
+ }
+
+ public static <T extends Service> T startAuxService(T service) throws Exception {
+ auxServices.add(service);
+ service.start();
+ return service;
+ }
+
+ public static <T extends Service> T startProviderService(T service) throws Exception {
+ providerServices.add(service);
+ service.start();
+ return service;
+ }
+
+ public static void rebalanceTestCluster() throws Exception {
+ log.debug(String.format("Triggering rebalance"));
+ IdealState poke = admin.getResourceIdealState(metaClusterName, metaResourceName);
+ admin.setResourceIdealState(metaClusterName, metaResourceName, poke);
+
+ int current = targetProvider.getTargetContainerCount(TestUtils.metaResourceName);
+ waitUntilRebalancedCount(current);
+ }
+
+ public static void waitUntilRebalancedCount(int containerCount) throws Exception {
+ log.debug(String.format("Waiting for rebalance with %d containers at '%s'", containerCount, zkAddress));
+
+ HelixAdmin admin = new ZKHelixAdmin(zkAddress);
+
+ try {
+ long limit = System.currentTimeMillis() + REBALANCE_TIMEOUT;
+ waitUntilPartitionCount(admin, metaClusterName, metaResourceName, containerCount, (limit - System.currentTimeMillis()));
+ waitUntilInstanceCount(admin, metaClusterName, metaResourceName, providerServices.size(), (limit - System.currentTimeMillis()));
+ waitUntilPartitionCount(admin, managedClusterName, managedResourceName, numManagedPartitions, (limit - System.currentTimeMillis()));
+
+ // FIXME workaround for Helix FULL_AUTO rebalancer not providing guarantees for cluster expansion
+ //waitUntilInstanceCount(admin, managedClusterName, managedResourceName, containerCount, (limit - System.currentTimeMillis()));
+ } catch (Exception e) {
+ throw e;
+ } finally {
+ admin.close();
+ }
+ }
+
+ public static void waitUntilInstanceCount(HelixAdmin admin, String cluster, String resource, int targetCount, long timeout) throws Exception {
+ log.debug(String.format("Waiting for instance count (cluster='%s', resource='%s', instanceCount=%d, timeout=%d)", cluster, resource, targetCount,
+ timeout));
+
+ long limit = System.currentTimeMillis() + timeout;
+ while (limit > System.currentTimeMillis()) {
+ int assignedCount = getAssingedInstances(admin, cluster, resource).size();
+ log.debug(String.format("checking instance count for '%s:%s': target=%d, current=%d", cluster, resource, targetCount, assignedCount));
+
+ if (targetCount == assignedCount) {
+ return;
+ }
+ Thread.sleep(POLL_INTERVAL);
+ }
+ throw new TimeoutException();
+ }
+
+ public static void waitUntilPartitionCount(HelixAdmin admin, String cluster, String resource, int targetCount, long timeout) throws Exception {
+ log.debug(String.format("Waiting for partition count (cluster='%s', resource='%s', partitionCount=%d, timeout=%d)", cluster, resource, targetCount,
+ timeout));
+
+ long limit = System.currentTimeMillis() + timeout;
+ while (limit > System.currentTimeMillis()) {
+ int assignedCount = getAssingedPartitions(admin, cluster, resource).size();
+ log.debug(String.format("checking partition count for '%s:%s': target=%d, current=%d", cluster, resource, targetCount, assignedCount));
+
+ if (targetCount == assignedCount) {
+ return;
+ }
+ Thread.sleep(POLL_INTERVAL);
+ }
+ throw new TimeoutException();
+ }
+
+ public static Set<String> getAssingedInstances(HelixAdmin admin, String clusterName, String resourceName) {
+ Set<String> assignedInstances = new HashSet<String>();
+
+ ExternalView externalView = admin.getResourceExternalView(clusterName, resourceName);
+
+ if (externalView == null)
+ return assignedInstances;
+
+ for (String partitionName : externalView.getPartitionSet()) {
+ Map<String, String> stateMap = externalView.getStateMap(partitionName);
+ if (stateMap == null)
+ continue;
+
+ for (String instanceName : stateMap.keySet()) {
+ String state = stateMap.get(instanceName);
+ if ("MASTER".equals(state) || "SLAVE".equals(state) || "ONLINE".equals(state)) {
+ assignedInstances.add(instanceName);
+ }
+ }
+ }
+
+ return assignedInstances;
+ }
+
+ public static Set<String> getAssingedPartitions(HelixAdmin admin, String clusterName, String resourceName) {
+ Set<String> assignedPartitions = new HashSet<String>();
+
+ ExternalView externalView = admin.getResourceExternalView(clusterName, resourceName);
+
+ if (externalView == null)
+ return assignedPartitions;
+
+ for (String partitionName : externalView.getPartitionSet()) {
+ Map<String, String> stateMap = externalView.getStateMap(partitionName);
+ if (stateMap == null)
+ continue;
+
+ for (String instanceName : stateMap.keySet()) {
+ String state = stateMap.get(instanceName);
+ if ("MASTER".equals(state) || "ONLINE".equals(state)) {
+ assignedPartitions.add(partitionName);
+ }
+ }
+ }
+
+ return assignedPartitions;
+ }
+
+ public static ZkServer createLocalZookeeper() throws Exception {
+ String baseDir = "/tmp/autoscale/";
+ final String dataDir = baseDir + "zk/dataDir";
+ final String logDir = baseDir + "zk/logDir";
+ FileUtils.deleteDirectory(new File(dataDir));
+ FileUtils.deleteDirectory(new File(logDir));
+
+ IDefaultNameSpace defaultNameSpace = new IDefaultNameSpace() {
+ @Override
+ public void createDefaultNameSpace(ZkClient zkClient) {
+
+ }
+ };
+ return new ZkServer(dataDir, logDir, defaultNameSpace, zkPort);
+ }
+
+ public static LocalContainerProviderProcess makeLocalProvider(String name) throws Exception {
+ LocalContainerProviderProcess process = new LocalContainerProviderProcess();
+ process.configure(makeProviderProperties(name));
+ return process;
+ }
+
+ public static ShellContainerProviderProcess makeShellProvider(String name) throws Exception {
+ ShellContainerProviderProcess process = new ShellContainerProviderProcess();
+ process.configure(makeProviderProperties(name));
+ return process;
+ }
+
+ public static YarnContainerProviderProcess makeYarnProvider(String name) throws Exception {
+ YarnContainerProviderProperties properties = new YarnContainerProviderProperties();
+
+ properties.putAll(makeProviderProperties(name));
+ properties.put(YarnContainerProviderProperties.YARNDATA, zkAddress);
+ properties.put(YarnContainerProviderProperties.RESOURCEMANAGER, resmanAddress);
+ properties.put(YarnContainerProviderProperties.SCHEDULER, schedulerAddress);
+ properties.put(YarnContainerProviderProperties.USER, yarnUser);
+ properties.put(YarnContainerProviderProperties.HDFS, hdfsAddress);
+
+ YarnContainerProviderProcess process = new YarnContainerProviderProcess();
+ process.configure(properties);
+
+ return process;
+ }
+
+ static ProviderProperties makeProviderProperties(String name) {
+ ProviderProperties properties = new ProviderProperties();
+ properties.putAll(providerProperties);
+ properties.setProperty(ProviderProperties.NAME, name);
+ return properties;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtilsUT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtilsUT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtilsUT.java
new file mode 100644
index 0000000..d55d7a4
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/TestUtilsUT.java
@@ -0,0 +1,63 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+
+import org.apache.helix.autoscale.Service;
+import org.apache.helix.autoscale.StatusProviderService;
+import org.apache.helix.autoscale.TargetProviderService;
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.local.LocalStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.Test;
+
+/**
+ * Self-test of test cluster. Spawning zookeeper and cluster with single provider and single instance.
+ *
+ * @see TestUtils
+ */
+@Test(groups={"unit"})
+public class TestUtilsUT {
+
+ static final Logger log = Logger.getLogger(TestUtilsUT.class);
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testZookeeper() throws Exception {
+ log.info("testing zookeeper");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testCluster() throws Exception {
+ log.info("testing cluster");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+
+ TestUtils.startTestCluster(new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, 1)),
+ new LocalStatusProvider(), TestUtils.makeLocalProvider("test"));
+ TestUtils.stopTestCluster();
+
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testClusterRepeated() throws Exception {
+ log.info("testing cluster restart");
+ TestUtils.configure();
+ TestUtils.startZookeeper();
+
+ TargetProviderService statusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, 1));
+ Service containerProvider = TestUtils.makeLocalProvider("test");
+ StatusProviderService containerStatusProvider = new LocalStatusProvider();
+
+ TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+ TestUtils.stopTestCluster();
+
+ TestUtils.startTestCluster(statusProvider, containerStatusProvider, containerProvider);
+ TestUtils.stopTestCluster();
+
+ TestUtils.stopZookeeper();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/YarnContainerProviderIT.java
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/YarnContainerProviderIT.java b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/YarnContainerProviderIT.java
new file mode 100644
index 0000000..78a0bf8
--- /dev/null
+++ b/recipes/auto-scale/src/test/java/org/apache/helix/autoscale/YarnContainerProviderIT.java
@@ -0,0 +1,101 @@
+package org.apache.helix.autoscale;
+
+import java.util.Collections;
+
+import org.apache.helix.autoscale.impl.StaticTargetProvider;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProcess;
+import org.apache.helix.autoscale.impl.yarn.YarnContainerProviderProperties;
+import org.apache.helix.autoscale.impl.yarn.YarnStatusProvider;
+import org.apache.log4j.Logger;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Yarn container provider and yarn status provider test. Scale-up and -down
+ * only, no failures.
+ *
+ * @see YarnContainerProvider
+ * @see YarnStatusProvider
+ */
+@Test(groups={"integration", "yarn"})
+public class YarnContainerProviderIT {
+
+ static final Logger log = Logger.getLogger(YarnContainerProviderIT.class);
+
+ static final int CONTAINER_COUNT = 4;
+
+ StaticTargetProvider clusterStatusProvider;
+ YarnContainerProviderProcess containerProvider;
+ YarnStatusProvider containerStatusProvider;
+
+ YarnContainerProviderProperties properties;
+
+ @BeforeClass(alwaysRun = true)
+ public void setupClass() throws Exception {
+ log.info("installing shutdown hook");
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ try { teardownTest(); } catch(Exception ignore) {};
+ }
+ }));
+ }
+
+ @BeforeMethod(alwaysRun = true)
+ public void setupTest() throws Exception {
+ log.debug("setting up yarn test case");
+
+ teardownTest();
+ TestUtils.configure("distributed.properties");
+ TestUtils.startZookeeper();
+
+ containerProvider = TestUtils.makeYarnProvider("provider_0");
+ containerStatusProvider = new YarnStatusProvider(TestUtils.zkAddress);
+ clusterStatusProvider = new StaticTargetProvider(Collections.singletonMap(TestUtils.metaResourceName, CONTAINER_COUNT));
+ TestUtils.startTestCluster(clusterStatusProvider, containerStatusProvider, containerProvider);
+
+ log.debug("running yarn test case");
+ }
+
+ @AfterMethod(alwaysRun = true)
+ public void teardownTest() throws Exception {
+ log.debug("cleaning up yarn test case");
+ TestUtils.stopTestCluster();
+ TestUtils.stopZookeeper();
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testStatic() throws Exception {
+ log.info("testing static");
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleUp() throws Exception {
+ log.info("testing scale up");
+ setContainerCount(CONTAINER_COUNT + 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleDown() throws Exception {
+ log.info("testing scale down");
+ setContainerCount(CONTAINER_COUNT - 2);
+ }
+
+ @Test(timeOut = TestUtils.TEST_TIMEOUT)
+ public void testScaleCycle() throws Exception {
+ log.info("testing scale cycle");
+ setContainerCount(CONTAINER_COUNT + 2);
+ setContainerCount(CONTAINER_COUNT);
+ setContainerCount(CONTAINER_COUNT - 2);
+ setContainerCount(CONTAINER_COUNT);
+ }
+
+ void setContainerCount(int newContainerCount) throws Exception {
+ log.debug(String.format("Setting container count to %d", newContainerCount));
+ clusterStatusProvider.setTargetContainerCount(TestUtils.metaResourceName, newContainerCount);
+ TestUtils.rebalanceTestCluster();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/resources/distributed.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/resources/distributed.properties b/recipes/auto-scale/src/test/resources/distributed.properties
new file mode 100644
index 0000000..47fd8e0
--- /dev/null
+++ b/recipes/auto-scale/src/test/resources/distributed.properties
@@ -0,0 +1,13 @@
+#
+# zookeeper will be spawned locally by TestUtils
+#
+zookeeper.port=2199
+zookeeper.address=rm:2199
+
+#
+# YARN and HDFS must be set up manually
+#
+yarn.resourcemanager=rm:8032
+yarn.scheduler=rm:8030
+yarn.hdfs=hdfs://rm:9000/
+yarn.user=yarn
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/resources/log4j.properties b/recipes/auto-scale/src/test/resources/log4j.properties
new file mode 100644
index 0000000..65800cc
--- /dev/null
+++ b/recipes/auto-scale/src/test/resources/log4j.properties
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Set root logger level to DEBUG and its only appender to R.
+log4j.rootLogger=ERROR, C
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.C=org.apache.log4j.ConsoleAppender
+log4j.appender.C.layout=org.apache.log4j.PatternLayout
+log4j.appender.C.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.logger.org.I0Itec=ERROR
+log4j.logger.org.apache=ERROR
+
+log4j.logger.org.apache.helix.autoscale=DEBUG
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/auto-scale/src/test/resources/standalone.properties
----------------------------------------------------------------------
diff --git a/recipes/auto-scale/src/test/resources/standalone.properties b/recipes/auto-scale/src/test/resources/standalone.properties
new file mode 100644
index 0000000..d4b4e86
--- /dev/null
+++ b/recipes/auto-scale/src/test/resources/standalone.properties
@@ -0,0 +1,13 @@
+#
+# zookeeper will be spawned locally by TestUtils
+#
+zookeeper.port=2199
+zookeeper.address=localhost:2199
+
+#
+# YARN and HDFS must be set up manually
+#
+yarn.resourcemanager=localhost:8032
+yarn.scheduler=localhost:8030
+yarn.hdfs=hdfs://localhost:9000/
+yarn.user=yarn
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/README.md
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/README.md b/recipes/meta-cluster-manager/README.md
new file mode 100644
index 0000000..9a8acf4
--- /dev/null
+++ b/recipes/meta-cluster-manager/README.md
@@ -0,0 +1,82 @@
+<!---
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+Meta cluster manager
+------------------------
+Auto-scaling for helix clusters using a meta cluster. The managed cluster operates as usual, managing resources and instances via AUTO_REBALANCE. The meta cluster monitors the managed cluster and injects or removes instances based on demand.
+
+The meta cluster makes decisions about scaling up or down based on information obtained from a "ClusterStatusProvider". A custom "ProviderRebalancer" is invoked testing the health of existing participants in the managed cluster with the "ContainerStatusProvider". If participants need to be (re-)deployed the "ContainerProvider" is invoked to instantiate and inject participants in the managed cluster.
+
+ContainerProviders are the participants of the meta cluster and there are multiple different implementations of the "ContainerProvider". First, the "LocalContainerProvider" spawns VM-local participants, i.e. participants of the managed cluster are spawned in the same VM the container provider exists. This is mainly useful for testing. Second, the "ShellContainerProvider" spawns a separate VM process for each participant using shell commands. Third, the "YarnContainerProvider" creates processes as container on a YARN cluster and manages their status using an external meta-data service (Zookeeper in this implementation). This implementation is fairly complex and has a number of external dependencies on a working YARN cluster and running services.
+
+Even though there are different types of providers the notion of a "ContainerProcess" abstracts implementation specifics. A process implementation inherits from "ContainerProcess" and can be instantiated by all three types of container providers. CAUTION: since separate VM process might be used a VM external method for coordination is required (e.g. Zookeeper)
+
+Configuration settings are passed throughout the application using traditional Properties objects. The "ConfigTool" contains default paths and helps to inject dependencies in the ProviderRebalancer.
+
+The application can be run and tested in three ways. First, a comprehensive suite of unit and integration tests can be run using "mvn verify". Second, the "Bootstrapper" can deploy a live managed and meta cluster based on a specification (e.g. "2by2shell.properties"), Third, the "MetaManagerDemo" deploys a test cluster and allows the user to step through a cycle of scale-up and scale-down as well as simulated container and container provider failures.
+
+
+The IdealState of the meta cluster uses the ONLINE-OFFLINE model and maps as follows in the axample below:
+
+Resource: type of container, e.g. database, webserver
+Partition: container id
+Instance: responsible container provider
+
+META:
+
+database
+ database_0
+ provider_0 : ONLINE
+ database_1
+ provider_1 : ONLINE
+webserver
+ webserver_0
+ provider_0 : ONLINE
+ webserver_1
+ provider_1 : ONLINE
+ webserver_2
+ provider_0 : ONLINE
+
+
+MANAGED:
+
+dbprod (tag=database)
+ dbprod_0
+ database_0 : MASTER
+ database_1 : SLAVE
+ dbprod_1
+ database_0 : SLAVE
+ database_1 : MASTER
+ dbprod_2
+ database_0 : MASTER
+ database_1 : SLAVE
+wsprod (tag=webserver)
+ wsprod_0
+ webserver_0 : ONLINE
+ wsprod_1
+ webserver_1 : ONLINE
+ wsprod_2
+ webserver_2 : ONLINE
+ wsprod_3
+ webserver_0 : ONLINE
+ wsprod_4
+ webserver_1 : ONLINE
+ wsprod_5
+ webserver_2 : ONLINE
+
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/e38aa54b/recipes/meta-cluster-manager/pom.xml
----------------------------------------------------------------------
diff --git a/recipes/meta-cluster-manager/pom.xml b/recipes/meta-cluster-manager/pom.xml
new file mode 100644
index 0000000..ba5eb69
--- /dev/null
+++ b/recipes/meta-cluster-manager/pom.xml
@@ -0,0 +1,210 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.helix.recipes</groupId>
+ <artifactId>recipes</artifactId>
+ <version>0.6.2-incubating-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>meta-cluster-manager</artifactId>
+ <packaging>jar</packaging>
+ <name>Apache Helix :: Recipes :: meta cluster manager</name>
+
+ <properties>
+ <hadoop.version>0.23.9</hadoop.version>
+
+ <ut.groups>unit</ut.groups>
+ <it.groups>local, shell</it.groups>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.testng</groupId>
+ <artifactId>testng</artifactId>
+ <version>6.0.1</version>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ <version>2.2.4</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.helix</groupId>
+ <artifactId>helix-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>javax.mail</groupId>
+ <artifactId>mail</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.jms</groupId>
+ <artifactId>jms</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jdmk</groupId>
+ <artifactId>jmxtools</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jmx</groupId>
+ <artifactId>jmxri</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>14.0.1</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-api</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-common</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>appassembler-maven-plugin</artifactId>
+ <configuration>
+ <configurationDirectory>conf</configurationDirectory>
+ <copyConfigurationDirectory>true</copyConfigurationDirectory>
+ <includeConfigurationDirectoryInClasspath>true</includeConfigurationDirectoryInClasspath>
+ <assembleDirectory>${project.build.directory}/metamanager-pkg</assembleDirectory>
+ <extraJvmArguments>-Xms512m -Xmx512m</extraJvmArguments>
+ <platforms>
+ <platform>unix</platform>
+ </platforms>
+ </configuration>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>assemble</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <configuration>
+ <excludes combine.children="append">
+ </excludes>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>appassembler-maven-plugin</artifactId>
+ <configuration>
+ <programs>
+ <program>
+ <mainClass>org.apache.helix.metamanager.bootstrapper.Boot</mainClass>
+ <name>boot</name>
+ </program>
+ <program>
+ <mainClass>org.apache.helix.metamanager.impl.shell.ShellContainerProcess</mainClass>
+ <name>shell-container-process</name>
+ </program>
+ <program>
+ <mainClass>org.apache.helix.metamanager.impl.yarn.YarnMasterProcess</mainClass>
+ <name>yarn-master-process</name>
+ </program>
+ <program>
+ <mainClass>org.apache.helix.metamanager.impl.yarn.YarnContainerProcess</mainClass>
+ <name>yarn-container-process</name>
+ </program>
+ <program>
+ <mainClass>org.apache.helix.metamanager.ZookeeperSetter</mainClass>
+ <name>zookeeper-setter</name>
+ </program>
+ </programs>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <finalName>metamanager</finalName>
+ <descriptor>src/main/assembly/assembly.xml</descriptor>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <groups>${ut.groups}</groups>
+ <excludedGroups>integration</excludedGroups>
+ <suiteXmlFiles>
+ <suiteXmlFile>src/test/config/testng.xml</suiteXmlFile>
+ </suiteXmlFiles>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ <configuration>
+ <groups>${it.groups}</groups>
+ <excludedGroups>unit</excludedGroups>
+ <suiteXmlFiles>
+ <suiteXmlFile>src/test/config/testng.xml</suiteXmlFile>
+ </suiteXmlFiles>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>integration-test</goal>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+ <profiles>
+ <profile>
+ <id>yarn</id>
+ <properties>
+ <it.groups>yarn</it.groups>
+ </properties>
+ </profile>
+ </profiles>
+</project>