You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by zt...@apache.org on 2019/04/23 07:52:47 UTC
[hadoop] branch trunk updated: SUBMARINE-40. Add TonY runtime to
Submarine. Contributed by Keqiu Hu.
This is an automated email from the ASF dual-hosted git repository.
ztang pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new e79a9c1 SUBMARINE-40. Add TonY runtime to Submarine. Contributed by Keqiu Hu.
e79a9c1 is described below
commit e79a9c12c14dcecc674048ed182e74d8690e663a
Author: Zhankun Tang <zt...@apache.org>
AuthorDate: Tue Apr 23 15:45:42 2019 +0800
SUBMARINE-40. Add TonY runtime to Submarine. Contributed by Keqiu Hu.
---
.../yarn/submarine/client/cli/CliConstants.java | 2 +
.../yarn/submarine/client/cli/RunJobCli.java | 3 +
.../client/cli/param/RunJobParameters.java | 29 ++-
.../src/site/markdown/QuickStart.md | 4 +-
.../hadoop-submarine-tony-runtime/README.md | 25 +++
.../hadoop-submarine-tony-runtime/pom.xml | 66 +++++++
.../submarine/runtimes/tony/TonyJobMonitor.java | 52 ++++++
.../submarine/runtimes/tony/TonyJobSubmitter.java | 97 ++++++++++
.../runtimes/tony/TonyRuntimeFactory.java | 55 ++++++
.../yarn/submarine/runtimes/tony/TonyUtils.java | 164 +++++++++++++++++
.../runtimes/tony/buider/JobStatusBuilder.java | 61 +++++++
.../runtimes/tony/buider/package-info.java | 14 ++
.../yarn/submarine/runtimes/tony/package-info.java | 14 ++
.../src/site/markdown/QuickStart.md | 198 +++++++++++++++++++++
.../src/site/resources/css/site.css | 29 +++
.../src/site/site.xml | 28 +++
.../src/test/java/TestTonyUtils.java | 113 ++++++++++++
.../yarnservice/YarnServiceJobSubmitter.java | 6 +-
hadoop-submarine/pom.xml | 1 +
19 files changed, 955 insertions(+), 6 deletions(-)
diff --git a/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliConstants.java b/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliConstants.java
index f952aff..00190f0 100644
--- a/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliConstants.java
+++ b/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliConstants.java
@@ -38,6 +38,7 @@ public class CliConstants {
public static final String TENSORBOARD_RESOURCES = "tensorboard_resources";
public static final String TENSORBOARD_DEFAULT_RESOURCES =
"memory=4G,vcores=1";
+ public static final String ARG_CONF = "conf";
public static final String WORKER_LAUNCH_CMD = "worker_launch_cmd";
public static final String SERVING_LAUNCH_CMD = "serving_launch_cmd";
@@ -57,4 +58,5 @@ public class CliConstants {
public static final String PRINCIPAL = "principal";
public static final String DISTRIBUTE_KEYTAB = "distribute_keytab";
public static final String YAML_CONFIG = "f";
+ public static final String INSECURE_CLUSTER = "insecure";
}
diff --git a/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/RunJobCli.java b/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/RunJobCli.java
index f9583c6..b38bddf 100644
--- a/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/RunJobCli.java
+++ b/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/RunJobCli.java
@@ -166,6 +166,9 @@ public class RunJobCli extends AbstractCli {
" parameter" + CliConstants.KEYTAB + " on cluster machines will be " +
"used");
options.addOption("h", "help", false, "Print help");
+ options.addOption("insecure", false, "Cluster is not Kerberos enabled.");
+ options.addOption("conf", true,
+ "User specified configuration, as key=val pairs.");
return options;
}
diff --git a/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunJobParameters.java b/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunJobParameters.java
index 4792144..e7b1e2f 100644
--- a/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunJobParameters.java
+++ b/hadoop-submarine/hadoop-submarine-core/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunJobParameters.java
@@ -56,9 +56,11 @@ public class RunJobParameters extends RunParameters {
private boolean waitJobFinish = false;
private boolean distributed = false;
+ private boolean securityDisabled = false;
private String keytab;
private String principal;
private boolean distributeKeytab = false;
+ private List<String> confPairs = new ArrayList<>();
@Override
public void updateParameters(ParametersHolder parametersHolder,
@@ -97,6 +99,10 @@ public class RunJobParameters extends RunParameters {
+ "please double check.");
}
+ if (parametersHolder.hasOption(CliConstants.INSECURE_CLUSTER)) {
+ setSecurityDisabled(true);
+ }
+
String kerberosKeytab = parametersHolder.getOptionValue(
CliConstants.KEYTAB);
String kerberosPrincipal = parametersHolder.getOptionValue(
@@ -181,6 +187,9 @@ public class RunJobParameters extends RunParameters {
boolean distributeKerberosKeytab = parametersHolder.hasOption(CliConstants
.DISTRIBUTE_KEYTAB);
+ List<String> configPairs = parametersHolder
+ .getOptionValues(CliConstants.ARG_CONF);
+
this.setInputPath(input).setCheckpointPath(jobDir)
.setNumPS(nPS).setNumWorkers(nWorkers)
.setPSLaunchCmd(psLaunchCommand).setWorkerLaunchCmd(workerLaunchCmd)
@@ -188,7 +197,8 @@ public class RunJobParameters extends RunParameters {
.setTensorboardEnabled(tensorboard)
.setKeytab(kerberosKeytab)
.setPrincipal(kerberosPrincipal)
- .setDistributeKeytab(distributeKerberosKeytab);
+ .setDistributeKeytab(distributeKerberosKeytab)
+ .setConfPairs(configPairs);
super.updateParameters(parametersHolder, clientContext);
}
@@ -329,6 +339,14 @@ public class RunJobParameters extends RunParameters {
return this;
}
+ public boolean isSecurityDisabled() {
+ return securityDisabled;
+ }
+
+ public void setSecurityDisabled(boolean securityDisabled) {
+ this.securityDisabled = securityDisabled;
+ }
+
public boolean isDistributeKeytab() {
return distributeKeytab;
}
@@ -339,6 +357,15 @@ public class RunJobParameters extends RunParameters {
return this;
}
+ public List<String> getConfPairs() {
+ return confPairs;
+ }
+
+ public RunJobParameters setConfPairs(List<String> confPairs) {
+ this.confPairs = confPairs;
+ return this;
+ }
+
@VisibleForTesting
public static class UnderscoreConverterPropertyUtils extends PropertyUtils {
@Override
diff --git a/hadoop-submarine/hadoop-submarine-core/src/site/markdown/QuickStart.md b/hadoop-submarine/hadoop-submarine-core/src/site/markdown/QuickStart.md
index 21db6bb..071e1a8 100644
--- a/hadoop-submarine/hadoop-submarine-core/src/site/markdown/QuickStart.md
+++ b/hadoop-submarine/hadoop-submarine-core/src/site/markdown/QuickStart.md
@@ -89,9 +89,9 @@ usage: job run
```
#### Notes:
-When using `localization` option to make a collection of dependency Python
+When using `localization` option to make a collection of dependency Python
scripts available to entry python script in the container, you may also need to
- set `PYTHONPATH` environment variable as below to avoid module import error
+set `PYTHONPATH` environment variable as below to avoid module import error
reported from `entry_script.py`.
```
diff --git a/hadoop-submarine/hadoop-submarine-tony-runtime/README.md b/hadoop-submarine/hadoop-submarine-tony-runtime/README.md
new file mode 100644
index 0000000..988565b
--- /dev/null
+++ b/hadoop-submarine/hadoop-submarine-tony-runtime/README.md
@@ -0,0 +1,25 @@
+<!---
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+
+# Overview
+[TonY](https://github.com/linkedin/TonY/) is an open source project that TonY
+is a framework to natively run deep learning frameworks on Apache Hadoop.
+As Submarine supports multiple runtimes, this module serves as an adaptor so
+ that Submarine could leverage TonY as a Runtime implementation to run
+TensorFlow with 2.x version of Hadoop and Hadoop installations w/o docker
+support or native service.
+
+Please jump to [QuickStart](src/site/markdown/QuickStart.md) to understand how
+to run Submarine with TonY runtime.
+
\ No newline at end of file
diff --git a/hadoop-submarine/hadoop-submarine-tony-runtime/pom.xml b/hadoop-submarine/hadoop-submarine-tony-runtime/pom.xml
new file mode 100644
index 0000000..8dbda98
--- /dev/null
+++ b/hadoop-submarine/hadoop-submarine-tony-runtime/pom.xml
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>hadoop-submarine</artifactId>
+ <groupId>org.apache.hadoop</groupId>
+ <version>0.2.0-SNAPSHOT</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>hadoop-submarine-tony-runtime</artifactId>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-submarine-core</artifactId>
+ <version>0.2.0-SNAPSHOT</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.linkedin.tony</groupId>
+ <artifactId>tony-core</artifactId>
+ <version>0.3.3</version>
+ <exclusions>
+ <exclusion>
+ <groupId>com.linkedin.tony</groupId>
+ <artifactId>tony-mini</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.linkedin.azkaban</groupId>
+ <artifactId>az-hadoop-jobtype-plugin</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-submarine-core</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ <version>0.2.0-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-core</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+</project>
diff --git a/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/TonyJobMonitor.java b/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/TonyJobMonitor.java
new file mode 100644
index 0000000..7540da8
--- /dev/null
+++ b/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/TonyJobMonitor.java
@@ -0,0 +1,52 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+package org.apache.hadoop.yarn.submarine.runtimes.tony;
+
+import com.linkedin.tony.TonyClient;
+import com.linkedin.tony.client.TaskUpdateListener;
+import com.linkedin.tony.rpc.TaskInfo;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.submarine.common.ClientContext;
+import org.apache.hadoop.yarn.submarine.common.api.JobStatus;
+import org.apache.hadoop.yarn.submarine.runtimes.common.JobMonitor;
+import org.apache.hadoop.yarn.submarine.runtimes.tony.buider.JobStatusBuilder;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * An implementation of JobMonitor with TonY library.
+ */
+public class TonyJobMonitor extends JobMonitor implements TaskUpdateListener {
+ private Set<TaskInfo> taskInfos = new HashSet<>();
+
+ public TonyJobMonitor(ClientContext clientContext, TonyClient client) {
+ super(clientContext);
+ client.addListener(this);
+ }
+
+ @Override
+ public JobStatus getTrainingJobStatus(String jobName)
+ throws IOException, YarnException {
+ JobStatus jobStatus = JobStatusBuilder.fromTaskInfoSet(taskInfos);
+ jobStatus.setJobName(jobName);
+ return jobStatus;
+ }
+
+ @Override
+ public void onTaskInfosUpdated(Set<TaskInfo> taskInfoSet) {
+ this.taskInfos = taskInfoSet;
+ }
+}
diff --git a/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/TonyJobSubmitter.java b/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/TonyJobSubmitter.java
new file mode 100644
index 0000000..bd50167
--- /dev/null
+++ b/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/TonyJobSubmitter.java
@@ -0,0 +1,97 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.runtimes.tony;
+
+import com.linkedin.tony.Constants;
+import com.linkedin.tony.TonyClient;
+import com.linkedin.tony.client.CallbackHandler;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.submarine.client.cli.param.RunJobParameters;
+import org.apache.hadoop.yarn.submarine.runtimes.common.JobSubmitter;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * Implementation of JobSumitter with TonY runtime.
+ */
+public class TonyJobSubmitter implements JobSubmitter, CallbackHandler {
+
+ private static final Log LOG = LogFactory.getLog(TonyJobSubmitter.class);
+ private ApplicationId applicationId;
+ private TonyClient tonyClient;
+
+ public TonyJobSubmitter() { }
+ public void setTonyClient(TonyClient client) {
+ this.tonyClient = client;
+ }
+
+ @Override
+ public ApplicationId submitJob(RunJobParameters parameters)
+ throws IOException, YarnException {
+ LOG.info("Starting Tony runtime..");
+
+ File tonyFinalConfPath = File.createTempFile("temp",
+ Constants.TONY_FINAL_XML);
+ // Write user's overridden conf to an xml to be localized.
+ Configuration tonyConf = TonyUtils.tonyConfFromClientContext(parameters);
+ try (OutputStream os = new FileOutputStream(tonyFinalConfPath)) {
+ tonyConf.writeXml(os);
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to create " + tonyFinalConfPath
+ + " conf file. Exiting.", e);
+ }
+
+ try {
+ tonyClient.init(new String[]{
+ "--conf_file", tonyFinalConfPath.getAbsolutePath()
+ });
+ } catch (Exception e) {
+ LOG.error("Failed to init TonyClient: ", e);
+ }
+ Thread clientThread = new Thread(tonyClient::start);
+ Runtime.getRuntime().addShutdownHook(new Thread(() -> {
+ try {
+ tonyClient.forceKillApplication();
+ } catch (YarnException | IOException e) {
+ LOG.error("Failed to kill application during shutdown.", e);
+ }
+ }));
+ clientThread.start();
+ while (clientThread.isAlive()) {
+ if (applicationId != null) {
+ LOG.info("TonyClient returned applicationId: " + applicationId);
+ return applicationId;
+ }
+ try {
+ Thread.sleep(5000);
+ } catch (InterruptedException e) {
+ LOG.error(e);
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public void onApplicationIdReceived(ApplicationId appId) {
+ applicationId = appId;
+ }
+}
diff --git a/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/TonyRuntimeFactory.java b/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/TonyRuntimeFactory.java
new file mode 100644
index 0000000..7a0d170
--- /dev/null
+++ b/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/TonyRuntimeFactory.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+package org.apache.hadoop.yarn.submarine.runtimes.tony;
+
+import com.linkedin.tony.TonyClient;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.submarine.common.ClientContext;
+import org.apache.hadoop.yarn.submarine.runtimes.RuntimeFactory;
+import org.apache.hadoop.yarn.submarine.runtimes.common.FSBasedSubmarineStorageImpl;
+import org.apache.hadoop.yarn.submarine.runtimes.common.JobMonitor;
+import org.apache.hadoop.yarn.submarine.runtimes.common.JobSubmitter;
+import org.apache.hadoop.yarn.submarine.runtimes.common.SubmarineStorage;
+
+/**
+ * Implementation of RuntimeFactory with Tony Runtime
+ */
+public class TonyRuntimeFactory extends RuntimeFactory {
+ private TonyClient tonyClient;
+ private TonyJobSubmitter submitter;
+ private TonyJobMonitor monitor;
+
+ public TonyRuntimeFactory(ClientContext clientContext) {
+ super(clientContext);
+ submitter = new TonyJobSubmitter();
+ tonyClient = new TonyClient(submitter, new Configuration());
+ monitor = new TonyJobMonitor(clientContext, tonyClient);
+ submitter.setTonyClient(tonyClient);
+ }
+
+ @Override
+ protected JobSubmitter internalCreateJobSubmitter() {
+ return submitter;
+ }
+
+ @Override
+ protected JobMonitor internalCreateJobMonitor() {
+ return monitor;
+ }
+
+ @Override
+ protected SubmarineStorage internalCreateSubmarineStorage() {
+ return new FSBasedSubmarineStorageImpl(super.clientContext);
+ }
+}
diff --git a/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/TonyUtils.java b/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/TonyUtils.java
new file mode 100644
index 0000000..7dc49b3
--- /dev/null
+++ b/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/TonyUtils.java
@@ -0,0 +1,164 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+package org.apache.hadoop.yarn.submarine.runtimes.tony;
+
+import com.linkedin.tony.Constants;
+import com.linkedin.tony.TonyConfigurationKeys;
+import com.linkedin.tony.util.Utils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
+import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException;
+import org.apache.hadoop.yarn.submarine.client.cli.param.RunJobParameters;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Utilities for Tony Runtime.
+ */
+public final class TonyUtils {
+ private static final Log LOG = LogFactory.getLog(TonyUtils.class);
+
+ public static Configuration tonyConfFromClientContext(
+ RunJobParameters parameters) {
+ Configuration tonyConf = new Configuration();
+ tonyConf.setInt(
+ TonyConfigurationKeys.getInstancesKey(Constants.WORKER_JOB_NAME),
+ parameters.getNumWorkers());
+ tonyConf.setInt(
+ TonyConfigurationKeys.getInstancesKey(Constants.PS_JOB_NAME),
+ parameters.getNumPS());
+ // Resources for PS & Worker
+ if (parameters.getPsResource() != null) {
+ tonyConf.setInt(
+ TonyConfigurationKeys.getResourceKey(Constants.PS_JOB_NAME,
+ Constants.VCORES),
+ parameters.getPsResource().getVirtualCores());
+ tonyConf.setLong(
+ TonyConfigurationKeys.getResourceKey(Constants.PS_JOB_NAME,
+ Constants.MEMORY),
+ parameters.getPsResource().getMemorySize());
+ }
+ if (parameters.getWorkerResource() != null) {
+ tonyConf.setInt(
+ TonyConfigurationKeys.getResourceKey(Constants.WORKER_JOB_NAME,
+ Constants.VCORES),
+ parameters.getWorkerResource().getVirtualCores());
+ tonyConf.setLong(
+ TonyConfigurationKeys.getResourceKey(Constants.WORKER_JOB_NAME,
+ Constants.MEMORY),
+ parameters.getWorkerResource().getMemorySize());
+ try {
+ tonyConf.setLong(
+ TonyConfigurationKeys.getResourceKey(Constants.WORKER_JOB_NAME,
+ Constants.GPUS),
+ parameters.getWorkerResource()
+ .getResourceValue(ResourceInformation.GPU_URI));
+ } catch (ResourceNotFoundException rnfe) {
+ LOG.error("GPU resources not enabled.");
+ }
+ }
+ if (parameters.getQueue() != null) {
+ tonyConf.set(
+ TonyConfigurationKeys.YARN_QUEUE_NAME,
+ parameters.getQueue());
+ }
+ // Set up Docker for PS & Worker
+ if (parameters.getDockerImageName() != null) {
+ tonyConf.set(TonyConfigurationKeys.getContainerDockerKey(),
+ parameters.getDockerImageName());
+ tonyConf.setBoolean(TonyConfigurationKeys.DOCKER_ENABLED, true);
+ }
+ if (parameters.getWorkerDockerImage() != null) {
+ tonyConf.set(
+ TonyConfigurationKeys.getDockerImageKey(Constants.WORKER_JOB_NAME),
+ parameters.getWorkerDockerImage());
+ tonyConf.setBoolean(TonyConfigurationKeys.DOCKER_ENABLED, true);
+ }
+ if (parameters.getPsDockerImage() != null) {
+ tonyConf.set(
+ TonyConfigurationKeys.getDockerImageKey(Constants.PS_JOB_NAME),
+ parameters.getPsDockerImage());
+ tonyConf.setBoolean(TonyConfigurationKeys.DOCKER_ENABLED, true);
+ }
+
+ // Set up container environment
+ List<String> envs = parameters.getEnvars();
+ tonyConf.setStrings(
+ TonyConfigurationKeys.CONTAINER_LAUNCH_ENV,
+ envs.toArray(new String[0]));
+ tonyConf.setStrings(TonyConfigurationKeys.EXECUTION_ENV,
+ envs.stream()
+ .map(env -> env.replaceAll("DOCKER_", ""))
+ .toArray(String[]::new));
+ tonyConf.setStrings(TonyConfigurationKeys.CONTAINER_LAUNCH_ENV,
+ envs.stream().map(env -> env.replaceAll("DOCKER_", ""))
+ .toArray(String[]::new));
+
+ // Set up running command
+ if (parameters.getWorkerLaunchCmd() != null) {
+ tonyConf.set(
+ TonyConfigurationKeys.getExecuteCommandKey(Constants.WORKER_JOB_NAME),
+ parameters.getWorkerLaunchCmd());
+ }
+
+ if (parameters.getPSLaunchCmd() != null) {
+ tonyConf.set(
+ TonyConfigurationKeys.getExecuteCommandKey(Constants.PS_JOB_NAME),
+ parameters.getPSLaunchCmd());
+ }
+
+ tonyConf.setBoolean(TonyConfigurationKeys.SECURITY_ENABLED,
+ !parameters.isSecurityDisabled());
+
+ // Set up container resources
+ if (parameters.getLocalizations() != null) {
+ tonyConf.setStrings(TonyConfigurationKeys.getContainerResourcesKey(),
+ parameters.getLocalizations().stream()
+ .map(lo -> lo.getRemoteUri() + Constants.RESOURCE_DIVIDER
+ + lo.getLocalPath())
+ .toArray(String[]::new));
+ }
+
+ if (parameters.getConfPairs() != null) {
+ String[] confArray = parameters.getConfPairs().toArray(new String[0]);
+ for (Map.Entry<String, String> cliConf : Utils
+ .parseKeyValue(confArray).entrySet()) {
+ String[] existingValue = tonyConf.getStrings(cliConf.getKey());
+ if (existingValue != null
+ && TonyConfigurationKeys
+ .MULTI_VALUE_CONF.contains(cliConf.getKey())) {
+ ArrayList<String> newValues = new ArrayList<>(Arrays
+ .asList(existingValue));
+ newValues.add(cliConf.getValue());
+ tonyConf.setStrings(cliConf.getKey(),
+ newValues.toArray(new String[0]));
+ } else {
+ tonyConf.set(cliConf.getKey(), cliConf.getValue());
+ }
+ }
+ }
+
+ LOG.info("Resources: " + tonyConf.get(
+ TonyConfigurationKeys.getContainerResourcesKey()));
+ return tonyConf;
+ }
+
+ private TonyUtils() {
+ }
+}
diff --git a/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/buider/JobStatusBuilder.java b/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/buider/JobStatusBuilder.java
new file mode 100644
index 0000000..c9e72dc
--- /dev/null
+++ b/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/buider/JobStatusBuilder.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+package org.apache.hadoop.yarn.submarine.runtimes.tony.buider;
+
+import com.linkedin.tony.rpc.TaskInfo;
+import org.apache.hadoop.yarn.submarine.common.api.JobComponentStatus;
+import org.apache.hadoop.yarn.submarine.common.api.JobStatus;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * JobStatusBuilder builds the job status from a set of TaskInfos.
+ */
+public final class JobStatusBuilder {
+ public static JobStatus fromTaskInfoSet(final Set<TaskInfo> taskInfos) {
+ JobStatus status = new JobStatus();
+ Set<String> jobNames =
+ taskInfos.stream().map(TaskInfo::getName).collect(Collectors.toSet());
+ List<JobComponentStatus> jobComponentStatusList = new ArrayList<>();
+ for (String jobName : jobNames) {
+ Set<TaskInfo> filterTasks = taskInfos.stream()
+ .filter(taskInfo -> taskInfo.getName().equals(jobName))
+ .collect(Collectors.toSet());
+ long numReadyContainers = 0;
+ long numRunningContainers = 0;
+ long totalAskedContainers = 0;
+ for (TaskInfo taskInfo : filterTasks) {
+ totalAskedContainers += 1;
+ switch (taskInfo.getStatus()) {
+ case READY:
+ numReadyContainers += 1;
+ break;
+ case RUNNING:
+ numRunningContainers += 1;
+ break;
+ default:
+ }
+ }
+ jobComponentStatusList.add(new JobComponentStatus(jobName,
+ numReadyContainers, numRunningContainers, totalAskedContainers));
+ }
+ status.setComponentStatus(jobComponentStatusList);
+ return status;
+ }
+
+ private JobStatusBuilder() { }
+}
diff --git a/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/buider/package-info.java b/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/buider/package-info.java
new file mode 100644
index 0000000..5dfd5a3
--- /dev/null
+++ b/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/buider/package-info.java
@@ -0,0 +1,14 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+package org.apache.hadoop.yarn.submarine.runtimes.tony.buider;
\ No newline at end of file
diff --git a/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/package-info.java b/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/package-info.java
new file mode 100644
index 0000000..4596202
--- /dev/null
+++ b/hadoop-submarine/hadoop-submarine-tony-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/tony/package-info.java
@@ -0,0 +1,14 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+package org.apache.hadoop.yarn.submarine.runtimes.tony;
\ No newline at end of file
diff --git a/hadoop-submarine/hadoop-submarine-tony-runtime/src/site/markdown/QuickStart.md b/hadoop-submarine/hadoop-submarine-tony-runtime/src/site/markdown/QuickStart.md
new file mode 100644
index 0000000..b6503e8
--- /dev/null
+++ b/hadoop-submarine/hadoop-submarine-tony-runtime/src/site/markdown/QuickStart.md
@@ -0,0 +1,198 @@
+<!---
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+
+# Quick Start Guide
+
+## Prerequisite
+
+Must:
+
+- Apache Hadoop 2.7 or above.
+
+Optional:
+
+- Enable GPU on YARN support (when GPU-based training is required, Hadoop 3.1 and above).
+- Enable Docker support on Hadoop (Hadoop 2.9 and above).
+
+## Run jobs
+
+### Commandline options
+
+```$xslt
+usage:
+ -docker_image <arg> Docker image name/tag
+ -env <arg> Common environment variable of worker/ps
+ -name <arg> Name of the job
+ -num_ps <arg> Number of PS tasks of the job, by default
+ it's 0
+ -num_workers <arg> Numnber of worker tasks of the job, by
+ default it's 1
+ -ps_docker_image <arg> Specify docker image for PS, when this is
+ not specified, PS uses --docker_image as
+ default.
+ -ps_launch_cmd <arg> Commandline of worker, arguments will be
+ directly used to launch the PS
+ -ps_resources <arg> Resource of each PS, for example
+ memory-mb=2048,vcores=2,yarn.io/gpu=2
+ -queue <arg> Name of queue to run the job, by default it
+ uses default queue
+ -saved_model_path <arg> Model exported path (savedmodel) of the job,
+ which is needed when exported model is not
+ placed under ${checkpoint_path}could be
+ local or other FS directory. This will be
+ used to serve.
+ -tensorboard <arg> Should we run TensorBoard for this job? By
+ default it's true
+ -verbose Print verbose log for troubleshooting
+ -wait_job_finish Specified when user want to wait the job
+ finish
+ -worker_docker_image <arg> Specify docker image for WORKER, when this
+ is not specified, WORKER uses --docker_image
+ as default.
+ -worker_launch_cmd <arg> Commandline of worker, arguments will be
+ directly used to launch the worker
+ -worker_resources <arg> Resource of each worker, for example
+ memory-mb=2048,vcores=2,yarn.io/gpu=2
+ -localization <arg> Specify localization to remote/local
+ file/directory available to all container(Docker).
+ Argument format is "RemoteUri:LocalFilePath[:rw]"
+ (ro permission is not supported yet).
+ The RemoteUri can be a file or directory in local
+ or HDFS or s3 or abfs or http .etc.
+ The LocalFilePath can be absolute or relative.
+ If relative, it'll be under container's implied
+ working directory.
+ This option can be set mutiple times.
+ Examples are
+ -localization "hdfs:///user/yarn/mydir2:/opt/data"
+ -localization "s3a:///a/b/myfile1:./"
+ -localization "https:///a/b/myfile2:./myfile"
+ -localization "/user/yarn/mydir3:/opt/mydir3"
+ -localization "./mydir1:."
+ -insecure Whether running in an insecure cluster
+ -conf Override configurations via commandline
+```
+
+### Submarine Configuration
+
+For submarine internal configuration, please create a `submarine.xml` which should be placed under `$HADOOP_CONF_DIR`.
+Make sure you set `submarine.runtime.class` to `org.apache.hadoop.yarn.submarine.runtimes.tony.TonyRuntimeFactory`
+
+|Configuration Name | Description |
+|:---- |:---- |
+| `submarine.runtime.class` | org.apache.hadoop.yarn.submarine.runtimes.tony.TonyRuntimeFactory
+| `submarine.localization.max-allowed-file-size-mb` | Optional. This sets a size limit to the file/directory to be localized in "-localization" CLI option. 2GB by default. |
+
+
+
+### Launch TensorFlow Application:
+
+#### Commandline
+
+### Without Docker
+
+You need:
+* Build a Python virtual environment with TensorFlow 1.13.1 installed
+* A cluster with Hadoop 2.7 or above.
+
+### Building a Python virtual environment with TensorFlow
+
+TonY requires a Python virtual environment zip with TensorFlow and any needed Python libraries already installed.
+
+```
+wget https://files.pythonhosted.org/packages/33/bc/fa0b5347139cd9564f0d44ebd2b147ac97c36b2403943dbee8a25fd74012/virtualenv-16.0.0.tar.gz
+tar xf virtualenv-16.0.0.tar.gz
+
+# Make sure to install using Python 3, as TensorFlow only provides Python 3 artifacts
+python virtualenv-16.0.0/virtualenv.py venv
+. venv/bin/activate
+pip install tensorflow==1.13.1
+zip -r venv.zip venv
+```
+
+### TensorFlow version
+
+ - Version 1.13.1
+
+**Note:** If you require a past version of TensorFlow and TensorBoard, take a look at [this](https://github.com/linkedin/TonY/issues/42) issue.
+
+
+### Installing Hadoop
+
+TonY only requires YARN, not HDFS. Please see the [open-source documentation](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SingleCluster.html) on how to set YARN up.
+
+### Get the training examples
+
+Get mnist_distributed.py from https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-tensorflow
+
+
+```
+CLASSPATH=$(hadoop classpath --glob): \
+./hadoop-submarine-core/target/hadoop-submarine-core-0.2.0-SNAPSHOT.jar: \
+./hadoop-submarine-yarnservice-runtime/target/hadoop-submarine-score-yarnservice-runtime-0.2.0-SNAPSHOT.jar: \
+./hadoop-submarine-tony-runtime/target/hadoop-submarine-tony-runtime-0.2.0-SNAPSHOT.jar: \
+/home/pi/hadoop/TonY/tony-cli/build/libs/tony-cli-0.3.2-all.jar \
+
+java org.apache.hadoop.yarn.submarine.client.cli.Cli job run --name tf-job-001 \
+ --num_workers 2 \
+ --worker_resources memory=3G,vcores=2 \
+ --num_ps 2 \
+ --ps_resources memory=3G,vcores=2 \
+ --worker_launch_cmd "venv.zip/venv/bin/python --steps 1000 --data_dir /tmp/data --working_dir /tmp/mode" \
+ --ps_launch_cmd "venv.zip/venv/bin/python --steps 1000 --data_dir /tmp/data --working_dir /tmp/mode" \
+ --container_resources /home/pi/hadoop/TonY/tony-cli/build/libs/tony-cli-0.3.2-all.jar
+ --insecure
+ --conf tony.containers.resources=PATH_TO_VENV_YOU_CREATED/venv.zip#archive,PATH_TO_MNIST_EXAMPLE/mnist_distributed.py, \
+PATH_TO_TONY_CLI_JAR/tony-cli-0.3.2-all.jar
+
+```
+You should then be able to see links and status of the jobs from command line:
+
+```
+2019-04-22 20:30:42,611 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: RUNNING
+2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: RUNNING
+2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: RUNNING
+2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for ps 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi
+2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi
+2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 1 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi
+2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: FINISHED
+2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: FINISHED
+2019-04-22 20:30:44,626 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: FINISHED
+
+```
+
+### With Docker
+
+```
+CLASSPATH=$(hadoop classpath --glob): \
+./hadoop-submarine-core/target/hadoop-submarine-core-0.2.0-SNAPSHOT.jar: \
+./hadoop-submarine-yarnservice-runtime/target/hadoop-submarine-score-yarnservice-runtime-0.2.0-SNAPSHOT.jar: \
+./hadoop-submarine-tony-runtime/target/hadoop-submarine-tony-runtime-0.2.0-SNAPSHOT.jar: \
+/home/pi/hadoop/TonY/tony-cli/build/libs/tony-cli-0.3.2-all.jar \
+
+java org.apache.hadoop.yarn.submarine.client.cli.Cli job run --name tf-job-001 \
+ --docker_image hadoopsubmarine/tf-1.8.0-cpu:0.0.3 \
+ --input_path hdfs://pi-aw:9000/dataset/cifar-10-data \
+ --worker_resources memory=3G,vcores=2 \
+ --worker_launch_cmd "export CLASSPATH=\$(/hadoop-3.1.0/bin/hadoop classpath --glob) && cd /test/models/tutorials/image/cifar10_estimator && python cifar10_main.py --data-dir=%input_path% --job-dir=%checkpoint_path% --train-steps=10000 --eval-batch-size=16 --train-batch-size=16 --variable-strategy=CPU --num-gpus=0 --sync" \
+ --env JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \
+ --env DOCKER_HADOOP_HDFS_HOME=/hadoop-3.1.0 \
+ --env DOCKER_JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \
+ --env HADOOP_HOME=/hadoop-3.1.0 \
+ --env HADOOP_YARN_HOME=/hadoop-3.1.0 \
+ --env HADOOP_COMMON_HOME=/hadoop-3.1.0 \
+ --env HADOOP_HDFS_HOME=/hadoop-3.1.0 \
+ --env HADOOP_CONF_DIR=/hadoop-3.1.0/etc/hadoop \
+ --conf tony.containers.resources=--conf tony.containers.resources=/home/pi/hadoop/TonY/tony-cli/build/libs/tony-cli-0.3.2-all.jar
+```
diff --git a/hadoop-submarine/hadoop-submarine-tony-runtime/src/site/resources/css/site.css b/hadoop-submarine/hadoop-submarine-tony-runtime/src/site/resources/css/site.css
new file mode 100644
index 0000000..7315db3
--- /dev/null
+++ b/hadoop-submarine/hadoop-submarine-tony-runtime/src/site/resources/css/site.css
@@ -0,0 +1,29 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+#banner {
+ height: 93px;
+ background: none;
+}
+
+#bannerLeft img {
+ margin-left: 30px;
+ margin-top: 10px;
+}
+
+#bannerRight img {
+ margin: 17px;
+}
diff --git a/hadoop-submarine/hadoop-submarine-tony-runtime/src/site/site.xml b/hadoop-submarine/hadoop-submarine-tony-runtime/src/site/site.xml
new file mode 100644
index 0000000..5feae9a
--- /dev/null
+++ b/hadoop-submarine/hadoop-submarine-tony-runtime/src/site/site.xml
@@ -0,0 +1,28 @@
+<!--
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+<project name="Apache Hadoop ${project.version}">
+
+ <skin>
+ <groupId>org.apache.maven.skins</groupId>
+ <artifactId>maven-stylus-skin</artifactId>
+ <version>${maven-stylus-skin.version}</version>
+ </skin>
+
+ <body>
+ <links>
+ <item name="Apache Hadoop" href="http://hadoop.apache.org/"/>
+ </links>
+ </body>
+
+</project>
diff --git a/hadoop-submarine/hadoop-submarine-tony-runtime/src/test/java/TestTonyUtils.java b/hadoop-submarine/hadoop-submarine-tony-runtime/src/test/java/TestTonyUtils.java
new file mode 100644
index 0000000..60e2c26
--- /dev/null
+++ b/hadoop-submarine/hadoop-submarine-tony-runtime/src/test/java/TestTonyUtils.java
@@ -0,0 +1,113 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+import com.linkedin.tony.Constants;
+import com.linkedin.tony.TonyConfigurationKeys;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.submarine.client.cli.RunJobCli;
+import org.apache.hadoop.yarn.submarine.client.cli.param.RunJobParameters;
+import org.apache.hadoop.yarn.submarine.common.MockClientContext;
+import org.apache.hadoop.yarn.submarine.common.conf.SubmarineLogs;
+import org.apache.hadoop.yarn.submarine.runtimes.RuntimeFactory;
+import org.apache.hadoop.yarn.submarine.runtimes.common.JobMonitor;
+import org.apache.hadoop.yarn.submarine.runtimes.common.JobSubmitter;
+import org.apache.hadoop.yarn.submarine.runtimes.common.SubmarineStorage;
+import org.apache.hadoop.yarn.submarine.runtimes.tony.TonyUtils;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestTonyUtils {
+
+ private MockClientContext getMockClientContext()
+ throws IOException, YarnException {
+ MockClientContext mockClientContext = new MockClientContext();
+ JobSubmitter mockJobSubmitter = mock(JobSubmitter.class);
+ when(mockJobSubmitter.submitJob(any(RunJobParameters.class))).thenReturn(
+ ApplicationId.newInstance(1234L, 1));
+ JobMonitor mockJobMonitor = mock(JobMonitor.class);
+ SubmarineStorage storage = mock(SubmarineStorage.class);
+ RuntimeFactory rtFactory = mock(RuntimeFactory.class);
+
+ when(rtFactory.getJobSubmitterInstance()).thenReturn(mockJobSubmitter);
+ when(rtFactory.getJobMonitorInstance()).thenReturn(mockJobMonitor);
+ when(rtFactory.getSubmarineStorage()).thenReturn(storage);
+
+ mockClientContext.setRuntimeFactory(rtFactory);
+ return mockClientContext;
+ }
+
+ @Before
+ public void before() {
+ SubmarineLogs.verboseOff();
+ }
+
+ @Test
+ public void testTonyConfFromClientContext() throws Exception {
+ RunJobCli runJobCli = new RunJobCli(getMockClientContext());
+ runJobCli.run(
+ new String[] {"--name", "my-job", "--docker_image", "tf-docker:1.1.0",
+ "--input_path", "hdfs://input",
+ "--num_workers", "3", "--num_ps", "2", "--worker_launch_cmd",
+ "python run-job.py", "--worker_resources", "memory=2048M,vcores=2",
+ "--ps_resources", "memory=4G,vcores=4", "--ps_launch_cmd",
+ "python run-ps.py"});
+ RunJobParameters jobRunParameters = runJobCli.getRunJobParameters();
+ Configuration tonyConf = TonyUtils
+ .tonyConfFromClientContext(jobRunParameters);
+ Assert.assertEquals(jobRunParameters.getDockerImageName(),
+ tonyConf.get(TonyConfigurationKeys.getContainerDockerKey()));
+ Assert.assertEquals("3", tonyConf.get(TonyConfigurationKeys
+ .getInstancesKey("worker")));
+ Assert.assertEquals(jobRunParameters.getWorkerLaunchCmd(),
+ tonyConf.get(TonyConfigurationKeys
+ .getExecuteCommandKey("worker")));
+ Assert.assertEquals("2048", tonyConf.get(TonyConfigurationKeys
+ .getResourceKey(Constants.WORKER_JOB_NAME, Constants.MEMORY)));
+ Assert.assertEquals("2", tonyConf.get(TonyConfigurationKeys
+ .getResourceKey(Constants.WORKER_JOB_NAME, Constants.VCORES)));
+ Assert.assertEquals("4096", tonyConf.get(TonyConfigurationKeys
+ .getResourceKey(Constants.PS_JOB_NAME, Constants.MEMORY)));
+ Assert.assertEquals("4", tonyConf.get(TonyConfigurationKeys
+ .getResourceKey(Constants.PS_JOB_NAME,
+ Constants.VCORES)));
+ Assert.assertEquals(jobRunParameters.getPSLaunchCmd(),
+ tonyConf.get(TonyConfigurationKeys.getExecuteCommandKey("ps")));
+ }
+}
diff --git a/hadoop-submarine/hadoop-submarine-yarnservice-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/YarnServiceJobSubmitter.java b/hadoop-submarine/hadoop-submarine-yarnservice-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/YarnServiceJobSubmitter.java
index 842f4ad..58a33cf 100644
--- a/hadoop-submarine/hadoop-submarine-yarnservice-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/YarnServiceJobSubmitter.java
+++ b/hadoop-submarine/hadoop-submarine-yarnservice-runtime/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/YarnServiceJobSubmitter.java
@@ -110,7 +110,7 @@ public class YarnServiceJobSubmitter implements JobSubmitter {
return serviceResource;
}
- private String getValueOfEnvionment(String envar) {
+ private String getValueOfEnvironment(String envar) {
// extract value from "key=value" form
if (envar == null || !envar.contains("=")) {
return "";
@@ -133,10 +133,10 @@ public class YarnServiceJobSubmitter implements JobSubmitter {
for (String envar : parameters.getEnvars()) {
if (envar.startsWith("DOCKER_HADOOP_HDFS_HOME=")) {
- hdfsHome = getValueOfEnvionment(envar);
+ hdfsHome = getValueOfEnvironment(envar);
hadoopEnv = true;
} else if (envar.startsWith("DOCKER_JAVA_HOME=")) {
- javaHome = getValueOfEnvionment(envar);
+ javaHome = getValueOfEnvironment(envar);
}
}
diff --git a/hadoop-submarine/pom.xml b/hadoop-submarine/pom.xml
index bc2a0ce..1f44556 100644
--- a/hadoop-submarine/pom.xml
+++ b/hadoop-submarine/pom.xml
@@ -37,6 +37,7 @@
<modules>
<module>hadoop-submarine-core</module>
<module>hadoop-submarine-yarnservice-runtime</module>
+ <module>hadoop-submarine-tony-runtime</module>
</modules>
<profiles>
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org