You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by su...@apache.org on 2018/08/13 08:53:14 UTC
[3/3] hadoop git commit: YARN-8561. [Submarine] Initial
implementation: Training job submission and job history retrieval.
Contributed by Wangda Tan.
YARN-8561. [Submarine] Initial implementation: Training job submission and job history retrieval. Contributed by Wangda Tan.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/cadbc8b5
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/cadbc8b5
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/cadbc8b5
Branch: refs/heads/trunk
Commit: cadbc8b57f94896aeff2ff5583c9a5ec374c80e2
Parents: a8dae00
Author: Sunil G <su...@apache.org>
Authored: Mon Aug 13 14:22:55 2018 +0530
Committer: Sunil G <su...@apache.org>
Committed: Mon Aug 13 14:22:55 2018 +0530
----------------------------------------------------------------------
.../hadoop-yarn-submarine/README.md | 53 +++
.../hadoop-yarn-submarine/pom.xml | 213 +++++++++
.../yarn/submarine/client/cli/AbstractCli.java | 47 ++
.../hadoop/yarn/submarine/client/cli/Cli.java | 106 +++++
.../yarn/submarine/client/cli/CliConstants.java | 48 ++
.../yarn/submarine/client/cli/CliUtils.java | 174 +++++++
.../yarn/submarine/client/cli/RunJobCli.java | 204 +++++++++
.../yarn/submarine/client/cli/ShowJobCli.java | 125 +++++
.../client/cli/param/BaseParameters.java | 56 +++
.../client/cli/param/RunJobParameters.java | 222 +++++++++
.../client/cli/param/RunParameters.java | 103 +++++
.../client/cli/param/ShowJobParameters.java | 18 +
.../yarn/submarine/common/ClientContext.java | 77 ++++
.../hadoop/yarn/submarine/common/Envs.java | 27 ++
.../common/api/JobComponentStatus.java | 73 +++
.../yarn/submarine/common/api/JobState.java | 52 +++
.../yarn/submarine/common/api/JobStatus.java | 87 ++++
.../yarn/submarine/common/api/TaskType.java | 32 ++
.../api/builder/JobComponentStatusBuilder.java | 44 ++
.../common/api/builder/JobStatusBuilder.java | 64 +++
.../common/conf/SubmarineConfiguration.java | 51 +++
.../submarine/common/conf/SubmarineLogs.java | 31 ++
.../common/exception/SubmarineException.java | 21 +
.../exception/SubmarineRuntimeException.java | 25 +
.../fs/DefaultRemoteDirectoryManager.java | 84 ++++
.../common/fs/RemoteDirectoryManager.java | 30 ++
.../yarn/submarine/runtimes/RuntimeFactory.java | 106 +++++
.../common/FSBasedSubmarineStorageImpl.java | 106 +++++
.../submarine/runtimes/common/JobMonitor.java | 84 ++++
.../submarine/runtimes/common/JobSubmitter.java | 36 ++
.../runtimes/common/StorageKeyConstants.java | 24 +
.../runtimes/common/SubmarineStorage.java | 57 +++
.../yarnservice/YarnServiceJobMonitor.java | 46 ++
.../yarnservice/YarnServiceJobSubmitter.java | 458 +++++++++++++++++++
.../yarnservice/YarnServiceRuntimeFactory.java | 44 ++
.../runtimes/yarnservice/YarnServiceUtils.java | 78 ++++
.../src/site/DeveloperGuide.md | 26 ++
.../src/site/QuickStart.md | 134 ++++++
.../client/cli/TestRunJobCliParsing.java | 229 ++++++++++
.../client/cli/TestShowJobCliParsing.java | 104 +++++
.../yarnservice/TestYarnServiceRunJobCli.java | 167 +++++++
.../yarnservice/YarnServiceCliTestUtils.java | 35 ++
.../submarine/common/MockClientContext.java | 56 +++
.../common/fs/MockRemoteDirectoryManager.java | 83 ++++
.../runtimes/common/MemorySubmarineStorage.java | 74 +++
.../common/TestFSBasedSubmarineStorage.java | 73 +++
.../yarnservice/TestTFConfigGenerator.java | 42 ++
.../src/test/resources/core-site.xml | 21 +
.../src/test/resources/hdfs-site.xml | 21 +
.../hadoop-yarn-applications/pom.xml | 1 +
50 files changed, 4172 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/README.md
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/README.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/README.md
new file mode 100644
index 0000000..3e04730
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/README.md
@@ -0,0 +1,53 @@
+<!---
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+
+# Overview
+
+```$xslt
+ _ _
+ | | (_)
+ ___ _ _ | |__ _ __ ___ __ _ _ __ _ _ __ ___
+ / __|| | | || '_ \ | '_ ` _ \ / _` || '__|| || '_ \ / _ \
+ \__ \| |_| || |_) || | | | | || (_| || | | || | | || __/
+ |___/ \__,_||_.__/ |_| |_| |_| \__,_||_| |_||_| |_| \___|
+
+ ?
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~|^"~~~~~~~~~~~~~~~~~~~~~~~~~o~~~~~~~~~~~
+ o | o __o
+ o | o |X__>
+ ___o | __o
+ (X___>-- __|__ |X__> o
+ | \ __o
+ | \ |X__>
+ _______________________|_______\________________
+ < \____________ _
+ \ \ (_)
+ \ O O O >=)
+ \__________________________________________________________/ (_)
+```
+
+Submarine is a project which allows infra engineer / data scientist to run *unmodified* Tensorflow programs on YARN.
+
+Goals of Submarine:
+- It allows jobs easy access data/models in HDFS and other storages.
+- Can launch services to serve Tensorflow/MXNet models.
+- Support run distributed Tensorflow jobs with simple configs.
+- Support run user-specified Docker images.
+- Support specify GPU and other resources.
+- Support launch tensorboard for training jobs if user specified.
+- Support customized DNS name for roles (like tensorboard.$user.$domain:6006)
+
+Please jump to [QuickStart](src/site/QuickStart.md) guide to quickly understand how to use this framework.
+
+If you're a developer, please find [Developer](src/site/DeveloperGuide.md) guide for more details.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/pom.xml
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/pom.xml
new file mode 100644
index 0000000..90a1a6c
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/pom.xml
@@ -0,0 +1,213 @@
+<?xml version="1.0"?>
+<!--
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
+ http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>hadoop-yarn-applications</artifactId>
+ <groupId>org.apache.hadoop</groupId>
+ <version>3.2.0-SNAPSHOT</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>hadoop-yarn-submarine</artifactId>
+ <version>3.2.0-SNAPSHOT</version>
+ <name>Yet Another Learning Platform</name>
+
+ <properties>
+ <!-- Needed for generating FindBugs warnings using parent pom -->
+ <yarn.basedir>${project.parent.parent.basedir}</yarn.basedir>
+ </properties>
+
+ <dependencies>
+ <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-server-applicationhistoryservice</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-server-timelineservice</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-annotations</artifactId>
+ </dependency>
+ <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-api</artifactId>
+ </dependency>
+ <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-common</artifactId>
+ </dependency>
+ <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-client</artifactId>
+ </dependency>
+ <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-server-nodemanager</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-server-tests</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-all</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-server-timeline-pluginstorage</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-server-timeline-pluginstorage</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-common</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs-client</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <scope>test</scope>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-fs2img</artifactId>
+ <version>3.2.0-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-services-api</artifactId>
+ <version>3.2.0-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-services-core</artifactId>
+ <version>3.2.0-SNAPSHOT</version>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ <!-- strictly speaking, the unit test is really a regression test. It
+ needs the main jar to be available to be able to run. -->
+ <phase>test-compile</phase>
+ </execution>
+ </executions>
+ <configuration>
+ <archive>
+ <manifest>
+ <mainClass>org.apache.hadoop.yarn.submarine.client.cli.Cli</mainClass>
+ </manifest>
+ </archive>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <environmentVariables>
+ <JAVA_HOME>${java.home}</JAVA_HOME>
+ </environmentVariables>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+
+</project>
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/AbstractCli.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/AbstractCli.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/AbstractCli.java
new file mode 100644
index 0000000..f6a9214
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/AbstractCli.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.client.cli;
+
+import org.apache.commons.cli.ParseException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.yarn.submarine.common.ClientContext;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.submarine.common.exception.SubmarineException;
+
+import java.io.IOException;
+
+public abstract class AbstractCli implements Tool {
+ protected ClientContext clientContext;
+
+ public AbstractCli(ClientContext cliContext) {
+ this.clientContext = cliContext;
+ }
+
+ @Override
+ public abstract int run(String[] args)
+ throws ParseException, IOException, YarnException, InterruptedException,
+ SubmarineException;
+
+ @Override
+ public void setConf(Configuration conf) {
+ clientContext.setSubmarineConfig(conf);
+ }
+
+ @Override
+ public Configuration getConf() {
+ return clientContext.getSubmarineConfig();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/Cli.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/Cli.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/Cli.java
new file mode 100644
index 0000000..b4c5e4c
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/Cli.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.client.cli;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.submarine.common.ClientContext;
+import org.apache.hadoop.yarn.submarine.common.fs.DefaultRemoteDirectoryManager;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.submarine.runtimes.RuntimeFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Arrays;
+
+public class Cli {
+ private static final Logger LOG =
+ LoggerFactory.getLogger(Cli.class);
+
+ private static void printHelp() {
+ StringBuilder helpMsg = new StringBuilder();
+ helpMsg.append("\n\nUsage: <object> [<action>] [<args>]\n");
+ helpMsg.append(" Below are all objects / actions:\n");
+ helpMsg.append(" job \n");
+ helpMsg.append(" run : run a job, please see 'job run --help' for usage \n");
+ helpMsg.append(" show : get status of job, please see 'job show --help' for usage \n");
+
+ System.out.println(helpMsg.toString());
+ }
+
+ private static ClientContext getClientContext() {
+ Configuration conf = new YarnConfiguration();
+ ClientContext clientContext = new ClientContext();
+ clientContext.setConfiguration(conf);
+ clientContext.setRemoteDirectoryManager(
+ new DefaultRemoteDirectoryManager(clientContext));
+ RuntimeFactory runtimeFactory = RuntimeFactory.getRuntimeFactory(
+ clientContext);
+ clientContext.setRuntimeFactory(runtimeFactory);
+ return clientContext;
+ }
+
+ public static void main(String[] args) throws Exception {
+ System.out.println(" _ _ \n"
+ + " | | (_) \n"
+ + " ___ _ _ | |__ _ __ ___ __ _ _ __ _ _ __ ___ \n"
+ + " / __|| | | || '_ \\ | '_ ` _ \\ / _` || '__|| || '_ \\ / _ \\\n"
+ + " \\__ \\| |_| || |_) || | | | | || (_| || | | || | | || __/\n"
+ + " |___/ \\__,_||_.__/ |_| |_| |_| \\__,_||_| |_||_| |_| \\___|\n"
+ + " \n"
+ + " ?\n"
+ + " ~~~~~~~~~~~~~~~~~~~~~~~~~~~|^\"~~~~~~~~~~~~~~~~~~~~~~~~~o~~~~~~~~~~~\n"
+ + " o | o __o\n"
+ + " o | o |X__>\n"
+ + " ___o | __o\n"
+ + " (X___>-- __|__ |X__> o\n"
+ + " | \\ __o\n"
+ + " | \\ |X__>\n"
+ + " _______________________|_______\\________________\n"
+ + " < \\____________ _\n"
+ + " \\ \\ (_)\n"
+ + " \\ O O O >=)\n"
+ + " \\__________________________________________________________/ (_)\n"
+ + "\n");
+
+ if (CliUtils.argsForHelp(args)) {
+ printHelp();
+ System.exit(0);
+ }
+
+ if (args.length < 2) {
+ LOG.error("Bad parameters specified.");
+ printHelp();
+ System.exit(-1);
+ }
+
+ String[] moduleArgs = Arrays.copyOfRange(args, 2, args.length);
+ ClientContext clientContext = getClientContext();
+
+ if (args[0].equals("job")) {
+ String subCmd = args[1];
+ if (subCmd.equals(CliConstants.RUN)) {
+ new RunJobCli(clientContext).run(moduleArgs);
+ } else if (subCmd.equals(CliConstants.SHOW)) {
+ new ShowJobCli(clientContext).run(moduleArgs);
+ } else {
+ printHelp();
+ throw new IllegalArgumentException("Unknown option for job");
+ }
+ } else {
+ printHelp();
+ throw new IllegalArgumentException("Bad parameters <TODO>");
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliConstants.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliConstants.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliConstants.java
new file mode 100644
index 0000000..d0958a8
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliConstants.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.client.cli;
+
+/*
+ * NOTE: use lowercase + "_" for the option name
+ */
+public class CliConstants {
+ public static final String RUN = "run";
+ public static final String SERVE = "serve";
+ public static final String LIST = "list";
+ public static final String SHOW = "show";
+ public static final String NAME = "name";
+ public static final String INPUT_PATH = "input_path";
+ public static final String CHECKPOINT_PATH = "checkpoint_path";
+ public static final String SAVED_MODEL_PATH = "saved_model_path";
+ public static final String N_WORKERS = "num_workers";
+ public static final String N_SERVING_TASKS = "num_serving_tasks";
+ public static final String N_PS = "num_ps";
+ public static final String WORKER_RES = "worker_resources";
+ public static final String SERVING_RES = "serving_resources";
+ public static final String PS_RES = "ps_resources";
+ public static final String DOCKER_IMAGE = "docker_image";
+ public static final String QUEUE = "queue";
+ public static final String TENSORBOARD = "tensorboard";
+ public static final String WORKER_LAUNCH_CMD = "worker_launch_cmd";
+ public static final String SERVING_LAUNCH_CMD = "serving_launch_cmd";
+ public static final String PS_LAUNCH_CMD = "ps_launch_cmd";
+ public static final String ENV = "env";
+ public static final String VERBOSE = "verbose";
+ public static final String SERVING_FRAMEWORK = "serving_framework";
+ public static final String STOP = "stop";
+ public static final String WAIT_JOB_FINISH = "wait_job_finish";
+ public static final String PS_DOCKER_IMAGE = "ps_docker_image";
+ public static final String WORKER_DOCKER_IMAGE = "worker_docker_image";
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliUtils.java
new file mode 100644
index 0000000..6dd3e4d
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliUtils.java
@@ -0,0 +1,174 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.client.cli;
+
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
+import org.apache.hadoop.yarn.api.records.ResourceTypeInfo;
+import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.submarine.client.cli.param.RunJobParameters;
+import org.apache.hadoop.yarn.submarine.common.fs.RemoteDirectoryManager;
+import org.apache.hadoop.yarn.util.UnitsConversionUtil;
+import org.apache.hadoop.yarn.util.resource.ResourceUtils;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class CliUtils {
+ private final static String RES_PATTERN = "^[^=]+=\\d+\\s?\\w*$";
+ /**
+ * Replace patterns inside cli
+ *
+ * @return launch command after pattern replace
+ */
+ public static String replacePatternsInLaunchCommand(String specifiedCli,
+ RunJobParameters jobRunParameters,
+ RemoteDirectoryManager directoryManager) throws IOException {
+ String jobDir = jobRunParameters.getCheckpointPath();
+ if (null == jobDir) {
+ jobDir = directoryManager.getJobCheckpointDir(jobRunParameters.getName(),
+ true).toString();
+ }
+
+ String input = jobRunParameters.getInputPath();
+ String savedModelDir = jobRunParameters.getSavedModelPath();
+ if (null == savedModelDir) {
+ savedModelDir = jobDir;
+ }
+
+ Map<String, String> replacePattern = new HashMap<>();
+ if (jobDir != null) {
+ replacePattern.put("%" + CliConstants.CHECKPOINT_PATH + "%", jobDir);
+ }
+ if (input != null) {
+ replacePattern.put("%" + CliConstants.INPUT_PATH + "%", input);
+ }
+ if (savedModelDir != null) {
+ replacePattern.put("%" + CliConstants.SAVED_MODEL_PATH + "%",
+ savedModelDir);
+ }
+
+ String newCli = specifiedCli;
+ for (Map.Entry<String, String> replace : replacePattern.entrySet()) {
+ newCli = newCli.replace(replace.getKey(), replace.getValue());
+ }
+
+ return newCli;
+ }
+
+ // TODO, this duplicated to Client of distributed shell, should cleanup
+ private static Map<String, Long> parseResourcesString(String resourcesStr) {
+ Map<String, Long> resources = new HashMap<>();
+
+ // Ignore the grouping "[]"
+ if (resourcesStr.startsWith("[")) {
+ resourcesStr = resourcesStr.substring(1);
+ }
+ if (resourcesStr.endsWith("]")) {
+ resourcesStr = resourcesStr.substring(0, resourcesStr.length());
+ }
+
+ for (String resource : resourcesStr.trim().split(",")) {
+ resource = resource.trim();
+ if (!resource.matches(RES_PATTERN)) {
+ throw new IllegalArgumentException("\"" + resource + "\" is not a "
+ + "valid resource type/amount pair. "
+ + "Please provide key=amount pairs separated by commas.");
+ }
+ String[] splits = resource.split("=");
+ String key = splits[0], value = splits[1];
+ String units = ResourceUtils.getUnits(value);
+
+ String valueWithoutUnit = value.substring(0, value.length() - units.length()).trim();
+ Long resourceValue = Long.valueOf(valueWithoutUnit);
+
+ // Convert commandline unit to standard YARN unit.
+ if (units.equals("M") || units.equals("m")) {
+ units = "Mi";
+ } else if (units.equals("G") || units.equals("g")) {
+ units = "Gi";
+ } else if (units.isEmpty()) {
+ // do nothing;
+ } else{
+ throw new IllegalArgumentException("Acceptable units are M/G or empty");
+ }
+
+ // special handle memory-mb and memory
+ if (key.equals(ResourceInformation.MEMORY_URI)) {
+ if (!units.isEmpty()) {
+ resourceValue = UnitsConversionUtil.convert(units, "Mi",
+ resourceValue);
+ }
+ }
+
+ if (key.equals("memory")) {
+ key = ResourceInformation.MEMORY_URI;
+ resourceValue = UnitsConversionUtil.convert(units, "Mi", resourceValue);
+ }
+
+ // special handle gpu
+ if (key.equals("gpu")) {
+ key = ResourceInformation.GPU_URI;
+ }
+
+ // special handle fpga
+ if (key.equals("fpga")) {
+ key = ResourceInformation.FPGA_URI;
+ }
+
+ resources.put(key, resourceValue);
+ }
+ return resources;
+ }
+
+ private static void validateResourceTypes(Iterable<String> resourceNames,
+ List<ResourceTypeInfo> resourceTypes) throws IOException, YarnException {
+ for (String resourceName : resourceNames) {
+ if (!resourceTypes.stream().anyMatch(
+ e -> e.getName().equals(resourceName))) {
+ throw new ResourceNotFoundException(
+ "Unknown resource: " + resourceName);
+ }
+ }
+ }
+
+ public static Resource createResourceFromString(String resourceStr,
+ List<ResourceTypeInfo> resourceTypes) throws IOException, YarnException {
+ Map<String, Long> typeToValue = parseResourcesString(resourceStr);
+ validateResourceTypes(typeToValue.keySet(), resourceTypes);
+ Resource resource = Resource.newInstance(0, 0);
+ for (Map.Entry<String, Long> entry : typeToValue.entrySet()) {
+ resource.setResourceValue(entry.getKey(), entry.getValue());
+ }
+ return resource;
+ }
+
+ // Is it for help?
+ public static boolean argsForHelp(String[] args) {
+ if (args == null || args.length == 0)
+ return true;
+
+ if (args.length == 1) {
+ if (args[0].equals("-h") || args[0].equals("--help")) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/RunJobCli.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/RunJobCli.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/RunJobCli.java
new file mode 100644
index 0000000..d7dfc0d
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/RunJobCli.java
@@ -0,0 +1,204 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.client.cli;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.submarine.client.cli.param.RunJobParameters;
+import org.apache.hadoop.yarn.submarine.common.ClientContext;
+import org.apache.hadoop.yarn.submarine.common.exception.SubmarineException;
+import org.apache.hadoop.yarn.submarine.runtimes.common.JobMonitor;
+import org.apache.hadoop.yarn.submarine.runtimes.common.JobSubmitter;
+import org.apache.hadoop.yarn.submarine.runtimes.common.StorageKeyConstants;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+public class RunJobCli extends AbstractCli {
+ private static final Logger LOG =
+ LoggerFactory.getLogger(RunJobCli.class);
+
+ private Options options;
+ private RunJobParameters parameters = new RunJobParameters();
+
+ private JobSubmitter jobSubmitter;
+ private JobMonitor jobMonitor;
+
+ public RunJobCli(ClientContext cliContext) {
+ this(cliContext, cliContext.getRuntimeFactory().getJobSubmitterInstance(),
+ cliContext.getRuntimeFactory().getJobMonitorInstance());
+ }
+
+ @VisibleForTesting
+ public RunJobCli(ClientContext cliContext, JobSubmitter jobSubmitter,
+ JobMonitor jobMonitor) {
+ super(cliContext);
+ options = generateOptions();
+ this.jobSubmitter = jobSubmitter;
+ this.jobMonitor = jobMonitor;
+ }
+
+ public void printUsages() {
+ new HelpFormatter().printHelp("job run", options);
+ }
+
+ private Options generateOptions() {
+ Options options = new Options();
+ options.addOption(CliConstants.NAME, true, "Name of the job");
+ options.addOption(CliConstants.INPUT_PATH, true,
+ "Input of the job, could be local or other FS directory");
+ options.addOption(CliConstants.CHECKPOINT_PATH, true,
+ "Training output directory of the job, "
+ + "could be local or other FS directory. This typically includes "
+ + "checkpoint files and exported model ");
+ options.addOption(CliConstants.SAVED_MODEL_PATH, true,
+ "Model exported path (savedmodel) of the job, which is needed when "
+ + "exported model is not placed under ${checkpoint_path}"
+ + "could be local or other FS directory. This will be used to serve.");
+ options.addOption(CliConstants.N_WORKERS, true,
+ "Numnber of worker tasks of the job, by default it's 1");
+ options.addOption(CliConstants.N_PS, true,
+ "Number of PS tasks of the job, by default it's 0");
+ options.addOption(CliConstants.WORKER_RES, true,
+ "Resource of each worker, for example "
+ + "memory-mb=2048,vcores=2,yarn.io/gpu=2");
+ options.addOption(CliConstants.PS_RES, true,
+ "Resource of each PS, for example "
+ + "memory-mb=2048,vcores=2,yarn.io/gpu=2");
+ options.addOption(CliConstants.DOCKER_IMAGE, true, "Docker image name/tag");
+ options.addOption(CliConstants.QUEUE, true,
+ "Name of queue to run the job, by default it uses default queue");
+ options.addOption(CliConstants.TENSORBOARD, true,
+ "Should we run TensorBoard" + " for this job? By default it's true");
+ options.addOption(CliConstants.WORKER_LAUNCH_CMD, true,
+ "Commandline of worker, arguments will be "
+ + "directly used to launch the worker");
+ options.addOption(CliConstants.PS_LAUNCH_CMD, true,
+ "Commandline of worker, arguments will be "
+ + "directly used to launch the PS");
+ options.addOption(CliConstants.ENV, true,
+ "Common environment variable of worker/ps");
+ options.addOption(CliConstants.VERBOSE, false,
+ "Print verbose log for troubleshooting");
+ options.addOption(CliConstants.WAIT_JOB_FINISH, false,
+ "Specified when user want to wait the job finish");
+ options.addOption(CliConstants.PS_DOCKER_IMAGE, true,
+ "Specify docker image for PS, when this is not specified, PS uses --"
+ + CliConstants.DOCKER_IMAGE + " as default.");
+ options.addOption(CliConstants.WORKER_DOCKER_IMAGE, true,
+ "Specify docker image for WORKER, when this is not specified, WORKER "
+ + "uses --" + CliConstants.DOCKER_IMAGE + " as default.");
+ options.addOption("h", "help", false, "Print help");
+ return options;
+ }
+
+ private void replacePatternsInParameters() throws IOException {
+ if (parameters.getPSLaunchCmd() != null && !parameters.getPSLaunchCmd()
+ .isEmpty()) {
+ String afterReplace = CliUtils.replacePatternsInLaunchCommand(
+ parameters.getPSLaunchCmd(), parameters,
+ clientContext.getRemoteDirectoryManager());
+ parameters.setPSLaunchCmd(afterReplace);
+ }
+
+ if (parameters.getWorkerLaunchCmd() != null && !parameters
+ .getWorkerLaunchCmd().isEmpty()) {
+ String afterReplace = CliUtils.replacePatternsInLaunchCommand(
+ parameters.getWorkerLaunchCmd(), parameters,
+ clientContext.getRemoteDirectoryManager());
+ parameters.setWorkerLaunchCmd(afterReplace);
+ }
+ }
+
+ private void parseCommandLineAndGetRunJobParameters(String[] args)
+ throws ParseException, IOException, YarnException {
+ try {
+ // Do parsing
+ GnuParser parser = new GnuParser();
+ CommandLine cli = parser.parse(options, args);
+ parameters.updateParametersByParsedCommandline(cli, options, clientContext);
+ } catch (ParseException e) {
+ LOG.error("Exception in parse:", e.getMessage());
+ printUsages();
+ throw e;
+ }
+
+ // replace patterns
+ replacePatternsInParameters();
+ }
+
+ private void storeJobInformation(String jobName, ApplicationId applicationId,
+ String[] args) throws IOException {
+ Map<String, String> jobInfo = new HashMap<>();
+ jobInfo.put(StorageKeyConstants.JOB_NAME, jobName);
+ jobInfo.put(StorageKeyConstants.APPLICATION_ID, applicationId.toString());
+
+ if (parameters.getCheckpointPath() != null) {
+ jobInfo.put(StorageKeyConstants.CHECKPOINT_PATH,
+ parameters.getCheckpointPath());
+ }
+ if (parameters.getInputPath() != null) {
+ jobInfo.put(StorageKeyConstants.INPUT_PATH,
+ parameters.getInputPath());
+ }
+ if (parameters.getSavedModelPath() != null) {
+ jobInfo.put(StorageKeyConstants.SAVED_MODEL_PATH,
+ parameters.getSavedModelPath());
+ }
+
+ String joinedArgs = String.join(" ", args);
+ jobInfo.put(StorageKeyConstants.JOB_RUN_ARGS, joinedArgs);
+ clientContext.getRuntimeFactory().getSubmarineStorage().addNewJob(jobName,
+ jobInfo);
+ }
+
+ @Override
+ public int run(String[] args)
+ throws ParseException, IOException, YarnException, InterruptedException,
+ SubmarineException {
+ if (CliUtils.argsForHelp(args)) {
+ printUsages();
+ return 0;
+ }
+
+ parseCommandLineAndGetRunJobParameters(args);
+ ApplicationId applicationId = this.jobSubmitter.submitJob(parameters);
+ storeJobInformation(parameters.getName(), applicationId, args);
+ if (parameters.isWaitJobFinish()) {
+ this.jobMonitor.waitTrainingFinal(parameters.getName());
+ }
+
+ return 0;
+ }
+
+ @VisibleForTesting
+ public JobSubmitter getJobSubmitter() {
+ return jobSubmitter;
+ }
+
+ @VisibleForTesting
+ RunJobParameters getRunJobParameters() {
+ return parameters;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/ShowJobCli.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/ShowJobCli.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/ShowJobCli.java
new file mode 100644
index 0000000..6b76192
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/ShowJobCli.java
@@ -0,0 +1,125 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.client.cli;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.submarine.client.cli.param.ShowJobParameters;
+import org.apache.hadoop.yarn.submarine.common.ClientContext;
+import org.apache.hadoop.yarn.submarine.common.exception.SubmarineException;
+import org.apache.hadoop.yarn.submarine.runtimes.common.StorageKeyConstants;
+import org.apache.hadoop.yarn.submarine.runtimes.common.SubmarineStorage;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Map;
+
+public class ShowJobCli extends AbstractCli {
+ private static final Logger LOG = LoggerFactory.getLogger(ShowJobCli.class);
+
+ private Options options;
+ private ShowJobParameters parameters = new ShowJobParameters();
+
+ public ShowJobCli(ClientContext cliContext) {
+ super(cliContext);
+ options = generateOptions();
+ }
+
+ public void printUsages() {
+ new HelpFormatter().printHelp("job show", options);
+ }
+
+ private Options generateOptions() {
+ Options options = new Options();
+ options.addOption(CliConstants.NAME, true, "Name of the job");
+ options.addOption("h", "help", false, "Print help");
+ return options;
+ }
+
+ private void parseCommandLineAndGetShowJobParameters(String[] args)
+ throws IOException, YarnException {
+ // Do parsing
+ GnuParser parser = new GnuParser();
+ CommandLine cli;
+ try {
+ cli = parser.parse(options, args);
+ parameters.updateParametersByParsedCommandline(cli, options,
+ clientContext);
+ } catch (ParseException e) {
+ printUsages();
+ }
+ }
+
+ private void printIfNotNull(String keyForPrint, String keyInStorage,
+ Map<String, String> jobInfo) {
+ if (jobInfo.containsKey(keyInStorage)) {
+ System.out.println("\t" + keyForPrint + ": " + jobInfo.get(keyInStorage));
+ }
+ }
+
+ private void printJobInfo(Map<String, String> jobInfo) {
+ System.out.println("Job Meta Info:");
+ printIfNotNull("Application Id", StorageKeyConstants.APPLICATION_ID,
+ jobInfo);
+ printIfNotNull("Input Path", StorageKeyConstants.INPUT_PATH, jobInfo);
+ printIfNotNull("Saved Model Path", StorageKeyConstants.SAVED_MODEL_PATH,
+ jobInfo);
+ printIfNotNull("Checkpoint Path", StorageKeyConstants.CHECKPOINT_PATH,
+ jobInfo);
+ printIfNotNull("Run Parameters", StorageKeyConstants.JOB_RUN_ARGS,
+ jobInfo);
+ }
+
+ @VisibleForTesting
+ protected void getAndPrintJobInfo() throws IOException {
+ SubmarineStorage storage =
+ clientContext.getRuntimeFactory().getSubmarineStorage();
+
+ Map<String, String> jobInfo = null;
+ try {
+ jobInfo = storage.getJobInfoByName(parameters.getName());
+ } catch (IOException e) {
+ LOG.error("Failed to retrieve job info", e);
+ throw e;
+ }
+
+ printJobInfo(jobInfo);
+ }
+
+ @VisibleForTesting
+ public ShowJobParameters getParameters() {
+ return parameters;
+ }
+
+ @Override
+ public int run(String[] args)
+ throws ParseException, IOException, YarnException, InterruptedException,
+ SubmarineException {
+ if (CliUtils.argsForHelp(args)) {
+ printUsages();
+ return 0;
+ }
+
+ parseCommandLineAndGetShowJobParameters(args);
+ getAndPrintJobInfo();
+ return 0;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/BaseParameters.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/BaseParameters.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/BaseParameters.java
new file mode 100644
index 0000000..609e868
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/BaseParameters.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.client.cli.param;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.submarine.client.cli.CliConstants;
+import org.apache.hadoop.yarn.submarine.common.ClientContext;
+import org.apache.hadoop.yarn.submarine.common.conf.SubmarineLogs;
+
+import java.io.IOException;
+
+/**
+ * Base class of all parameters.
+ */
+public abstract class BaseParameters {
+ private String name;
+
+ public void updateParametersByParsedCommandline(CommandLine parsedCommandLine,
+ Options options, ClientContext clientContext)
+ throws ParseException, IOException, YarnException {
+ String name = parsedCommandLine.getOptionValue(CliConstants.NAME);
+ if (name == null) {
+ throw new ParseException("--name is absent");
+ }
+
+ if (parsedCommandLine.hasOption(CliConstants.VERBOSE)) {
+ SubmarineLogs.verboseOn();
+ }
+
+ this.setName(name);
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public BaseParameters setName(String name) {
+ this.name = name;
+ return this;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunJobParameters.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunJobParameters.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunJobParameters.java
new file mode 100644
index 0000000..6cab9e3
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunJobParameters.java
@@ -0,0 +1,222 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.client.cli.param;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.submarine.client.cli.CliConstants;
+import org.apache.hadoop.yarn.submarine.client.cli.CliUtils;
+import org.apache.hadoop.yarn.submarine.common.ClientContext;
+
+import java.io.IOException;
+
+/**
+ * Parameters used to run a job
+ */
+public class RunJobParameters extends RunParameters {
+ private String input;
+ private String checkpointPath;
+
+ private int numWorkers;
+ private int numPS;
+ private Resource workerResource;
+ private Resource psResource;
+ private boolean tensorboardEnabled;
+ private String workerLaunchCmd;
+ private String psLaunchCmd;
+
+ private String psDockerImage = null;
+ private String workerDockerImage = null;
+
+ private boolean waitJobFinish = false;
+ private boolean distributed = false;
+
+ @Override
+ public void updateParametersByParsedCommandline(CommandLine parsedCommandLine,
+ Options options, ClientContext clientContext)
+ throws ParseException, IOException, YarnException {
+
+ String input = parsedCommandLine.getOptionValue(CliConstants.INPUT_PATH);
+ String jobDir = parsedCommandLine.getOptionValue(CliConstants.CHECKPOINT_PATH);
+ int nWorkers = 1;
+ if (parsedCommandLine.getOptionValue(CliConstants.N_WORKERS) != null) {
+ nWorkers = Integer.parseInt(
+ parsedCommandLine.getOptionValue(CliConstants.N_WORKERS));
+ }
+
+ int nPS = 0;
+ if (parsedCommandLine.getOptionValue(CliConstants.N_PS) != null) {
+ nPS = Integer.parseInt(
+ parsedCommandLine.getOptionValue(CliConstants.N_PS));
+ }
+
+ // Check #workers and #ps.
+ // When distributed training is required
+ if (nWorkers >= 2 && nPS > 0) {
+ distributed = true;
+ } else if (nWorkers == 1 && nPS > 0) {
+ throw new ParseException("Only specified one worker but non-zero PS, "
+ + "please double check.");
+ }
+
+ String workerResourceStr = parsedCommandLine.getOptionValue(
+ CliConstants.WORKER_RES);
+ if (workerResourceStr == null) {
+ throw new ParseException("--" + CliConstants.WORKER_RES + " is absent.");
+ }
+ Resource workerResource = CliUtils.createResourceFromString(
+ workerResourceStr,
+ clientContext.getOrCreateYarnClient().getResourceTypeInfo());
+
+ Resource psResource = null;
+ if (nPS > 0) {
+ String psResourceStr = parsedCommandLine.getOptionValue(CliConstants.PS_RES);
+ if (psResourceStr == null) {
+ throw new ParseException("--" + CliConstants.PS_RES + " is absent.");
+ }
+ psResource = CliUtils.createResourceFromString(psResourceStr,
+ clientContext.getOrCreateYarnClient().getResourceTypeInfo());
+ }
+
+ boolean tensorboard = false;
+ if (parsedCommandLine.getOptionValue(CliConstants.TENSORBOARD) != null) {
+ tensorboard = Boolean.parseBoolean(
+ parsedCommandLine.getOptionValue(CliConstants.TENSORBOARD));
+ }
+
+ if (parsedCommandLine.hasOption(CliConstants.WAIT_JOB_FINISH)) {
+ this.waitJobFinish = true;
+ }
+
+ psDockerImage = parsedCommandLine.getOptionValue(
+ CliConstants.PS_DOCKER_IMAGE);
+ workerDockerImage = parsedCommandLine.getOptionValue(
+ CliConstants.WORKER_DOCKER_IMAGE);
+
+ String workerLaunchCmd = parsedCommandLine.getOptionValue(
+ CliConstants.WORKER_LAUNCH_CMD);
+ String psLaunchCommand = parsedCommandLine.getOptionValue(
+ CliConstants.PS_LAUNCH_CMD);
+
+ this.setInputPath(input).setCheckpointPath(jobDir).setNumPS(nPS).setNumWorkers(nWorkers)
+ .setPSLaunchCmd(psLaunchCommand).setWorkerLaunchCmd(workerLaunchCmd)
+ .setPsResource(psResource).setWorkerResource(workerResource)
+ .setTensorboardEnabled(tensorboard);
+
+ super.updateParametersByParsedCommandline(parsedCommandLine,
+ options, clientContext);
+ }
+
+ public String getInputPath() {
+ return input;
+ }
+
+ public RunJobParameters setInputPath(String input) {
+ this.input = input;
+ return this;
+ }
+
+ public String getCheckpointPath() {
+ return checkpointPath;
+ }
+
+ public RunJobParameters setCheckpointPath(String checkpointPath) {
+ this.checkpointPath = checkpointPath;
+ return this;
+ }
+
+ public int getNumWorkers() {
+ return numWorkers;
+ }
+
+ public RunJobParameters setNumWorkers(int numWorkers) {
+ this.numWorkers = numWorkers;
+ return this;
+ }
+
+ public int getNumPS() {
+ return numPS;
+ }
+
+ public RunJobParameters setNumPS(int numPS) {
+ this.numPS = numPS;
+ return this;
+ }
+
+ public Resource getWorkerResource() {
+ return workerResource;
+ }
+
+ public RunJobParameters setWorkerResource(Resource workerResource) {
+ this.workerResource = workerResource;
+ return this;
+ }
+
+ public Resource getPsResource() {
+ return psResource;
+ }
+
+ public RunJobParameters setPsResource(Resource psResource) {
+ this.psResource = psResource;
+ return this;
+ }
+
+ public boolean isTensorboardEnabled() {
+ return tensorboardEnabled;
+ }
+
+ public RunJobParameters setTensorboardEnabled(boolean tensorboardEnabled) {
+ this.tensorboardEnabled = tensorboardEnabled;
+ return this;
+ }
+
+ public String getWorkerLaunchCmd() {
+ return workerLaunchCmd;
+ }
+
+ public RunJobParameters setWorkerLaunchCmd(String workerLaunchCmd) {
+ this.workerLaunchCmd = workerLaunchCmd;
+ return this;
+ }
+
+ public String getPSLaunchCmd() {
+ return psLaunchCmd;
+ }
+
+ public RunJobParameters setPSLaunchCmd(String psLaunchCmd) {
+ this.psLaunchCmd = psLaunchCmd;
+ return this;
+ }
+
+ public boolean isWaitJobFinish() {
+ return waitJobFinish;
+ }
+
+
+ public String getPsDockerImage() {
+ return psDockerImage;
+ }
+
+ public String getWorkerDockerImage() {
+ return workerDockerImage;
+ }
+
+ public boolean isDistributed() {
+ return distributed;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunParameters.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunParameters.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunParameters.java
new file mode 100644
index 0000000..28884d8
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunParameters.java
@@ -0,0 +1,103 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.client.cli.param;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.submarine.client.cli.CliConstants;
+import org.apache.hadoop.yarn.submarine.common.ClientContext;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Parameters required to run anything on cluster. Such as run job / serve model
+ */
+public abstract class RunParameters extends BaseParameters {
+ private String savedModelPath;
+ private String dockerImageName;
+ private List<String> envars = new ArrayList<>();
+ private String queue;
+
+ @Override
+ public void updateParametersByParsedCommandline(CommandLine parsedCommandLine,
+ Options options, ClientContext clientContext) throws ParseException,
+ IOException, YarnException {
+ String savedModelPath = parsedCommandLine.getOptionValue(
+ CliConstants.SAVED_MODEL_PATH);
+ this.setSavedModelPath(savedModelPath);
+
+ // Envars
+ List<String> envarsList = new ArrayList<>();
+ String[] envars = parsedCommandLine.getOptionValues(CliConstants.ENV);
+ if (envars != null) {
+ for (String envar : envars) {
+ envarsList.add(envar);
+ }
+ }
+ this.setEnvars(envarsList);
+
+ String queue = parsedCommandLine.getOptionValue(
+ CliConstants.QUEUE);
+ this.setQueue(queue);
+
+ String dockerImage = parsedCommandLine.getOptionValue(
+ CliConstants.DOCKER_IMAGE);
+ this.setDockerImageName(dockerImage);
+
+ super.updateParametersByParsedCommandline(parsedCommandLine,
+ options, clientContext);
+ }
+
+ public String getQueue() {
+ return queue;
+ }
+
+ public RunParameters setQueue(String queue) {
+ this.queue = queue;
+ return this;
+ }
+
+ public String getDockerImageName() {
+ return dockerImageName;
+ }
+
+ public RunParameters setDockerImageName(String dockerImageName) {
+ this.dockerImageName = dockerImageName;
+ return this;
+ }
+
+
+ public List<String> getEnvars() {
+ return envars;
+ }
+
+ public RunParameters setEnvars(List<String> envars) {
+ this.envars = envars;
+ return this;
+ }
+
+ public String getSavedModelPath() {
+ return savedModelPath;
+ }
+
+ public RunParameters setSavedModelPath(String savedModelPath) {
+ this.savedModelPath = savedModelPath;
+ return this;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/ShowJobParameters.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/ShowJobParameters.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/ShowJobParameters.java
new file mode 100644
index 0000000..e5f19d6
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/ShowJobParameters.java
@@ -0,0 +1,18 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.client.cli.param;
+
+public class ShowJobParameters extends BaseParameters {
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/ClientContext.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/ClientContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/ClientContext.java
new file mode 100644
index 0000000..31a8b1b
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/ClientContext.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.common;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.client.api.YarnClient;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.submarine.common.conf.SubmarineConfiguration;
+import org.apache.hadoop.yarn.submarine.common.fs.RemoteDirectoryManager;
+import org.apache.hadoop.yarn.submarine.runtimes.RuntimeFactory;
+
+public class ClientContext {
+ private Configuration yarnConf = new YarnConfiguration();
+
+ private RemoteDirectoryManager remoteDirectoryManager;
+ private YarnClient yarnClient;
+ private Configuration submarineConfig;
+ private RuntimeFactory runtimeFactory;
+
+ public ClientContext() {
+ submarineConfig = new SubmarineConfiguration();
+ }
+
+ public synchronized YarnClient getOrCreateYarnClient() {
+ if (yarnClient == null) {
+ yarnClient = YarnClient.createYarnClient();
+ yarnClient.init(yarnConf);
+ yarnClient.start();
+ }
+ return yarnClient;
+ }
+
+ public Configuration getYarnConfig() {
+ return yarnConf;
+ }
+
+ public void setConfiguration(Configuration conf) {
+ this.yarnConf = conf;
+ }
+
+ public RemoteDirectoryManager getRemoteDirectoryManager() {
+ return remoteDirectoryManager;
+ }
+
+ public void setRemoteDirectoryManager(
+ RemoteDirectoryManager remoteDirectoryManager) {
+ this.remoteDirectoryManager = remoteDirectoryManager;
+ }
+
+ public Configuration getSubmarineConfig() {
+ return submarineConfig;
+ }
+
+ public void setSubmarineConfig(Configuration submarineConfig) {
+ this.submarineConfig = submarineConfig;
+ }
+
+ public RuntimeFactory getRuntimeFactory() {
+ return runtimeFactory;
+ }
+
+ public void setRuntimeFactory(RuntimeFactory runtimeFactory) {
+ this.runtimeFactory = runtimeFactory;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/Envs.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/Envs.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/Envs.java
new file mode 100644
index 0000000..a1d80db
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/Envs.java
@@ -0,0 +1,27 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.common;
+
+public class Envs {
+ public static final String TASK_TYPE_ENV = "_TASK_TYPE";
+ public static final String TASK_INDEX_ENV = "_TASK_INDEX";
+
+ /*
+ * HDFS/HADOOP-related configs
+ */
+ public static final String HADOOP_HDFS_HOME = "HADOOP_HDFS_HOME";
+ public static final String JAVA_HOME = "JAVA_HOME";
+ public static final String HADOOP_CONF_DIR = "HADOOP_CONF_DIR";
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobComponentStatus.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobComponentStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobComponentStatus.java
new file mode 100644
index 0000000..22468c2
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobComponentStatus.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.submarine.common.api;
+
+import org.apache.hadoop.yarn.service.api.records.Component;
+import org.apache.hadoop.yarn.service.api.records.Container;
+import org.apache.hadoop.yarn.service.api.records.ContainerState;
+
+/**
+ * Status of component of training job
+ */
+public class JobComponentStatus {
+ private String compName;
+ private long numReadyContainers = 0;
+ private long numRunningButUnreadyContainers = 0;
+ private long totalAskedContainers;
+
+ public JobComponentStatus(String compName, long nReadyContainers,
+ long nRunningButUnreadyContainers, long totalAskedContainers) {
+ this.compName = compName;
+ this.numReadyContainers = nReadyContainers;
+ this.numRunningButUnreadyContainers = nRunningButUnreadyContainers;
+ this.totalAskedContainers = totalAskedContainers;
+ }
+
+ public String getCompName() {
+ return compName;
+ }
+
+ public void setCompName(String compName) {
+ this.compName = compName;
+ }
+
+ public long getNumReadyContainers() {
+ return numReadyContainers;
+ }
+
+ public void setNumReadyContainers(long numReadyContainers) {
+ this.numReadyContainers = numReadyContainers;
+ }
+
+ public long getNumRunningButUnreadyContainers() {
+ return numRunningButUnreadyContainers;
+ }
+
+ public void setNumRunningButUnreadyContainers(
+ long numRunningButUnreadyContainers) {
+ this.numRunningButUnreadyContainers = numRunningButUnreadyContainers;
+ }
+
+ public long getTotalAskedContainers() {
+ return totalAskedContainers;
+ }
+
+ public void setTotalAskedContainers(long totalAskedContainers) {
+ this.totalAskedContainers = totalAskedContainers;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobState.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobState.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobState.java
new file mode 100644
index 0000000..eef273a
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobState.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.submarine.common.api;
+
+/**
+ * State of training job
+ */
+public enum JobState {
+ /**
+ * Job accepted by scheduler and start running
+ */
+ RUNNING,
+
+ /**
+ * Job killed by user
+ */
+ KILLED,
+
+ /**
+ * Job failed
+ */
+ FAILED,
+
+ /**
+ * Job succeeded
+ */
+ SUCCEEDED,
+
+ /**
+ * Job paused by user
+ */
+ PAUSED;
+
+ public static boolean isFinal(JobState state) {
+ return state == KILLED || state == SUCCEEDED || state == FAILED;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobStatus.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobStatus.java
new file mode 100644
index 0000000..6e390f3
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobStatus.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.submarine.common.api;
+
+import java.io.PrintStream;
+import java.time.Instant;
+import java.util.List;
+
+/**
+ * Status of training job.
+ */
+public class JobStatus {
+
+ protected String jobName;
+ protected JobState state;
+ protected String tensorboardLink = "N/A";
+ protected List<JobComponentStatus> componentStatus;
+
+ public void nicePrint(PrintStream out) {
+ out.println(
+ "Job Name=" + this.jobName + ", status=" + state.name() + " time="
+ + Instant.now());
+ if (JobState.isFinal(this.state)) {
+ return;
+ }
+
+ if (tensorboardLink.startsWith("http")) {
+ out.println(" Tensorboard link: " + tensorboardLink);
+ }
+
+ out.println(" Components:");
+ for (JobComponentStatus comp : componentStatus) {
+ out.println(" [" + comp.getCompName() + "] Ready=" + comp
+ .getNumReadyContainers() + " + Running-But-Non-Ready=" + comp
+ .getNumRunningButUnreadyContainers() + " | Asked=" + comp
+ .getTotalAskedContainers());
+ }
+ out.println("------------------");
+ }
+
+ public JobState getState() {
+ return state;
+ }
+
+ public String getTensorboardLink() {
+ return tensorboardLink;
+ }
+
+ public List<JobComponentStatus> getComponentStatus() {
+ return componentStatus;
+ }
+
+ public String getJobName() {
+ return jobName;
+ }
+
+ public void setJobName(String jobName) {
+ this.jobName = jobName;
+ }
+
+ public void setState(JobState state) {
+ this.state = state;
+ }
+
+ public void setTensorboardLink(String tensorboardLink) {
+ this.tensorboardLink = tensorboardLink;
+ }
+
+ public void setComponentStatus(List<JobComponentStatus> componentStatus) {
+ this.componentStatus = componentStatus;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/TaskType.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/TaskType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/TaskType.java
new file mode 100644
index 0000000..535d994
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/TaskType.java
@@ -0,0 +1,32 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.common.api;
+
+public enum TaskType {
+ PRIMARY_WORKER("master"),
+ WORKER("worker"),
+ PS("ps"),
+ TENSORBOARD("tensorboard");
+
+ private String compName;
+
+ TaskType(String compName) {
+ this.compName = compName;
+ }
+
+ public String getComponentName() {
+ return compName;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/builder/JobComponentStatusBuilder.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/builder/JobComponentStatusBuilder.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/builder/JobComponentStatusBuilder.java
new file mode 100644
index 0000000..fbefe6b
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/builder/JobComponentStatusBuilder.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.submarine.common.api.builder;
+
+import org.apache.hadoop.yarn.service.api.records.Component;
+import org.apache.hadoop.yarn.service.api.records.Container;
+import org.apache.hadoop.yarn.service.api.records.ContainerState;
+import org.apache.hadoop.yarn.submarine.common.api.JobComponentStatus;
+
+public class JobComponentStatusBuilder {
+ public static JobComponentStatus fromServiceComponent(Component component) {
+ long totalAskedContainers = component.getNumberOfContainers();
+ int numReadyContainers = 0;
+ int numRunningButUnreadyContainers = 0;
+ String compName = component.getName();
+
+ for (Container c : component.getContainers()) {
+ if (c.getState() == ContainerState.READY) {
+ numReadyContainers++;
+ } else if (c.getState() == ContainerState.RUNNING_BUT_UNREADY) {
+ numRunningButUnreadyContainers++;
+ }
+ }
+
+ return new JobComponentStatus(compName, numReadyContainers,
+ numRunningButUnreadyContainers, totalAskedContainers);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/builder/JobStatusBuilder.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/builder/JobStatusBuilder.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/builder/JobStatusBuilder.java
new file mode 100644
index 0000000..2f7971e
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/builder/JobStatusBuilder.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.common.api.builder;
+
+import org.apache.hadoop.yarn.service.api.records.Component;
+import org.apache.hadoop.yarn.service.api.records.Service;
+import org.apache.hadoop.yarn.service.api.records.ServiceState;
+import org.apache.hadoop.yarn.submarine.common.api.JobComponentStatus;
+import org.apache.hadoop.yarn.submarine.common.api.JobState;
+import org.apache.hadoop.yarn.submarine.common.api.JobStatus;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class JobStatusBuilder {
+ public static JobStatus fromServiceSpec(Service serviceSpec) {
+ JobStatus status = new JobStatus();
+ status.setState(fromServiceState(serviceSpec.getState()));
+
+ // If it is a final state, return.
+ if (JobState.isFinal(status.getState())) {
+ return status;
+ }
+
+ List<JobComponentStatus> componentStatusList = new ArrayList<>();
+
+ for (Component component : serviceSpec.getComponents()) {
+ componentStatusList.add(
+ JobComponentStatusBuilder.fromServiceComponent(component));
+ }
+ status.setComponentStatus(componentStatusList);
+
+ // TODO, handle tensorboard differently.
+ // status.setTensorboardLink(getTensorboardLink(serviceSpec, clientContext));
+
+ status.setJobName(serviceSpec.getName());
+
+ return status;
+ }
+
+ private static JobState fromServiceState(ServiceState serviceState) {
+ switch (serviceState) {
+ case STOPPED:
+ // TODO, once YARN-8488 gets committed, we need to update this.
+ return JobState.SUCCEEDED;
+ case FAILED:
+ return JobState.FAILED;
+ }
+
+ return JobState.RUNNING;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/conf/SubmarineConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/conf/SubmarineConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/conf/SubmarineConfiguration.java
new file mode 100644
index 0000000..c9e6b7b
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/conf/SubmarineConfiguration.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.common.conf;
+
+import org.apache.hadoop.conf.Configuration;
+
+public class SubmarineConfiguration extends Configuration {
+ private static final String SUBMARINE_CONFIGURATION_FILE = "submarine.xml";
+
+ public SubmarineConfiguration() {
+ this(new Configuration(false), true);
+ }
+
+ public SubmarineConfiguration(Configuration configuration) {
+ this(configuration, false);
+ }
+
+ public SubmarineConfiguration(Configuration configuration,
+ boolean loadLocalConfig) {
+ super(configuration);
+ if (loadLocalConfig) {
+ addResource(SUBMARINE_CONFIGURATION_FILE);
+ }
+ }
+
+ /*
+ * Runtime of submarine
+ */
+
+ private static final String PREFIX = "submarine.";
+
+ public static final String RUNTIME_CLASS = PREFIX + "runtime.class";
+ public static final String DEFAULT_RUNTIME_CLASS =
+ "org.apache.hadoop.yarn.submarine.runtimes.yarnservice.YarnServiceRuntimeFactory";
+
+ public void setSubmarineRuntimeClass(String runtimeClass) {
+ set(RUNTIME_CLASS, runtimeClass);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cadbc8b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/conf/SubmarineLogs.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/conf/SubmarineLogs.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/conf/SubmarineLogs.java
new file mode 100644
index 0000000..6bb3248
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/conf/SubmarineLogs.java
@@ -0,0 +1,31 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+
+package org.apache.hadoop.yarn.submarine.common.conf;
+
+public class SubmarineLogs {
+ private static volatile boolean verbose = false;
+
+ public static boolean isVerbose() {
+ return SubmarineLogs.verbose;
+ }
+
+ public static void verboseOn() {
+ SubmarineLogs.verbose = true;
+ }
+
+ public static void verboseOff() {
+ SubmarineLogs.verbose = false;
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org