You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ar...@apache.org on 2018/03/04 17:13:50 UTC
[10/12] drill git commit: DRILL-1170: YARN integration for Drill
DRILL-1170: YARN integration for Drill
closes #1011
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/f2ac8749
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/f2ac8749
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/f2ac8749
Branch: refs/heads/master
Commit: f2ac8749b42539ca6301024becbf3e7092e9511e
Parents: cf2478f
Author: Paul Rogers <pr...@maprtech.com>
Authored: Thu Oct 26 00:24:00 2017 -0700
Committer: Arina Ielchiieva <ar...@gmail.com>
Committed: Sun Mar 4 17:43:22 2018 +0200
----------------------------------------------------------------------
distribution/pom.xml | 5 +
distribution/src/assemble/bin.xml | 40 +
distribution/src/resources/drill-am-log.xml | 54 +
distribution/src/resources/drill-am.sh | 137 ++
.../src/resources/drill-on-yarn-example.conf | 204 +++
distribution/src/resources/drill-on-yarn.sh | 74 +
distribution/src/resources/yarn-client-log.xml | 44 +
distribution/src/resources/yarn-drillbit.sh | 178 +++
drill-yarn/README.md | 190 +++
drill-yarn/USAGE.md | 941 +++++++++++++
drill-yarn/img/am-overview.png | Bin 0 -> 111982 bytes
drill-yarn/img/client-classes.png | Bin 0 -> 138409 bytes
drill-yarn/img/controller-classes.png | Bin 0 -> 157668 bytes
drill-yarn/img/overview.png | Bin 0 -> 65994 bytes
drill-yarn/pom.xml | 144 ++
.../drill/yarn/appMaster/AMException.java | 30 +
.../drill/yarn/appMaster/AMRegistrar.java | 43 +
.../yarn/appMaster/AMWrapperException.java | 25 +
.../drill/yarn/appMaster/AMYarnFacade.java | 91 ++
.../drill/yarn/appMaster/AMYarnFacadeImpl.java | 288 ++++
.../appMaster/AbstractDrillbitScheduler.java | 166 +++
.../drill/yarn/appMaster/AbstractScheduler.java | 112 ++
.../yarn/appMaster/AbstractTaskManager.java | 50 +
.../drill/yarn/appMaster/BatchScheduler.java | 88 ++
.../drill/yarn/appMaster/ClusterController.java | 206 +++
.../yarn/appMaster/ClusterControllerImpl.java | 785 +++++++++++
.../drill/yarn/appMaster/ControllerFactory.java | 30 +
.../drill/yarn/appMaster/ControllerVisitor.java | 22 +
.../apache/drill/yarn/appMaster/Dispatcher.java | 345 +++++
.../drill/yarn/appMaster/DispatcherAddOn.java | 30 +
.../yarn/appMaster/DrillApplicationMaster.java | 119 ++
.../yarn/appMaster/DrillControllerFactory.java | 398 ++++++
.../drill/yarn/appMaster/DrillbitScheduler.java | 51 +
.../drill/yarn/appMaster/EventContext.java | 70 +
.../drill/yarn/appMaster/NodeInventory.java | 195 +++
.../yarn/appMaster/PersistentTaskScheduler.java | 174 +++
.../apache/drill/yarn/appMaster/Pollable.java | 28 +
.../drill/yarn/appMaster/PulseRunnable.java | 79 ++
.../drill/yarn/appMaster/RegistryHandler.java | 35 +
.../apache/drill/yarn/appMaster/Scheduler.java | 162 +++
.../drill/yarn/appMaster/SchedulerState.java | 79 ++
.../yarn/appMaster/SchedulerStateActions.java | 101 ++
.../yarn/appMaster/SchedulerStateImpl.java | 467 +++++++
.../org/apache/drill/yarn/appMaster/Task.java | 323 +++++
.../yarn/appMaster/TaskLifecycleListener.java | 26 +
.../apache/drill/yarn/appMaster/TaskSpec.java | 40 +
.../apache/drill/yarn/appMaster/TaskState.java | 895 ++++++++++++
.../drill/yarn/appMaster/TaskVisitor.java | 22 +
.../yarn/appMaster/YarnFacadeException.java | 30 +
.../yarn/appMaster/http/AMSecurityManager.java | 34 +
.../appMaster/http/AMSecurityManagerImpl.java | 221 +++
.../yarn/appMaster/http/AbstractTasksModel.java | 380 ++++++
.../drill/yarn/appMaster/http/AmRestApi.java | 296 ++++
.../yarn/appMaster/http/AuthDynamicFeature.java | 114 ++
.../yarn/appMaster/http/ControllerModel.java | 208 +++
.../drill/yarn/appMaster/http/PageTree.java | 80 ++
.../drill/yarn/appMaster/http/WebServer.java | 467 +++++++
.../yarn/appMaster/http/WebUiPageTree.java | 527 +++++++
.../drill/yarn/appMaster/http/package-info.java | 22 +
.../drill/yarn/appMaster/package-info.java | 36 +
.../org/apache/drill/yarn/client/AMRunner.java | 368 +++++
.../apache/drill/yarn/client/CleanCommand.java | 89 ++
.../apache/drill/yarn/client/ClientCommand.java | 100 ++
.../apache/drill/yarn/client/ClientContext.java | 48 +
.../drill/yarn/client/ClientException.java | 34 +
.../drill/yarn/client/CommandLineOptions.java | 230 ++++
.../apache/drill/yarn/client/DrillOnYarn.java | 176 +++
.../apache/drill/yarn/client/FileUploader.java | 551 ++++++++
.../apache/drill/yarn/client/HelpCommand.java | 26 +
.../apache/drill/yarn/client/KillCommand.java | 48 +
.../drill/yarn/client/PrintConfigCommand.java | 49 +
.../apache/drill/yarn/client/ResizeCommand.java | 115 ++
.../drill/yarn/client/SimpleRestClient.java | 66 +
.../apache/drill/yarn/client/StartCommand.java | 145 ++
.../apache/drill/yarn/client/StatusCommand.java | 189 +++
.../apache/drill/yarn/client/StopCommand.java | 223 +++
.../apache/drill/yarn/client/package-info.java | 31 +
.../org/apache/drill/yarn/core/AppSpec.java | 169 +++
.../org/apache/drill/yarn/core/ClusterDef.java | 212 +++
.../drill/yarn/core/ContainerRequestSpec.java | 125 ++
.../org/apache/drill/yarn/core/DfsFacade.java | 345 +++++
.../org/apache/drill/yarn/core/DoYUtil.java | 189 +++
.../drill/yarn/core/DoyConfigException.java | 30 +
.../drill/yarn/core/DrillOnYarnConfig.java | 841 ++++++++++++
.../org/apache/drill/yarn/core/LaunchSpec.java | 248 ++++
.../apache/drill/yarn/core/NameValuePair.java | 46 +
.../drill/yarn/core/YarnClientException.java | 30 +
.../apache/drill/yarn/core/YarnRMClient.java | 207 +++
.../apache/drill/yarn/core/package-info.java | 24 +
.../org/apache/drill/yarn/package-info.java | 35 +
.../org/apache/drill/yarn/zk/AMRegistry.java | 145 ++
.../drill/yarn/zk/ZKClusterCoordinator.java | 318 +++++
.../yarn/zk/ZKClusterCoordinatorDriver.java | 315 +++++
.../apache/drill/yarn/zk/ZKConfigException.java | 26 +
.../org/apache/drill/yarn/zk/ZKRegistry.java | 582 ++++++++
.../drill/yarn/zk/ZKRuntimeException.java | 30 +
.../org/apache/drill/yarn/zk/package-info.java | 31 +
.../src/main/resources/drill-am/config.ftl | 41 +
.../src/main/resources/drill-am/confirm.ftl | 70 +
.../src/main/resources/drill-am/generic.ftl | 78 ++
.../src/main/resources/drill-am/history.ftl | 59 +
.../src/main/resources/drill-am/index.ftl | 128 ++
.../src/main/resources/drill-am/login.ftl | 35 +
.../src/main/resources/drill-am/manage.ftl | 78 ++
.../src/main/resources/drill-am/redirect.ftl | 33 +
.../main/resources/drill-am/shrink-warning.ftl | 58 +
.../resources/drill-am/static/css/drill-am.css | 20 +
.../drill-am/static/img/apache-drill-logo.png | Bin 0 -> 29802 bytes
.../resources/drill-am/static/img/drill.ico | Bin 0 -> 580 bytes
.../src/main/resources/drill-am/tasks.ftl | 113 ++
.../drill/yarn/core/drill-on-yarn-defaults.conf | 275 ++++
.../apache/drill/yarn/client/TestClient.java | 137 ++
.../yarn/client/TestCommandLineOptions.java | 84 ++
.../org/apache/drill/yarn/core/TestConfig.java | 267 ++++
.../org/apache/drill/yarn/scripts/README.md | 65 +
.../apache/drill/yarn/scripts/ScriptUtils.java | 847 ++++++++++++
.../apache/drill/yarn/scripts/TestScripts.java | 1288 ++++++++++++++++++
.../drill/yarn/zk/TestAmRegistration.java | 129 ++
.../apache/drill/yarn/zk/TestZkRegistry.java | 459 +++++++
.../src/test/resources/doy-test-logback.xml | 56 +
.../src/test/resources/second-test-config.conf | 34 +
.../src/test/resources/test-doy-config.conf | 32 +
.../src/test/resources/test-doy-distrib.conf | 30 +
.../src/test/resources/third-test-config.conf | 32 +
drill-yarn/src/test/resources/wrapper.sh | 53 +
pom.xml | 225 +++
126 files changed, 21163 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/f2ac8749/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index d2f65e4..c374939 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -59,6 +59,11 @@
<version>${project.version}</version>
</dependency>
<dependency>
+ <groupId>org.apache.drill</groupId>
+ <artifactId>drill-yarn</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
<groupId>org.apache.drill.exec</groupId>
<artifactId>drill-jdbc</artifactId>
<version>${project.version}</version>
http://git-wip-us.apache.org/repos/asf/drill/blob/f2ac8749/distribution/src/assemble/bin.xml
----------------------------------------------------------------------
diff --git a/distribution/src/assemble/bin.xml b/distribution/src/assemble/bin.xml
index bed34ed..7ca1140 100644
--- a/distribution/src/assemble/bin.xml
+++ b/distribution/src/assemble/bin.xml
@@ -110,6 +110,16 @@
<useProjectArtifact>false</useProjectArtifact>
</dependencySet>
+ <dependencySet>
+ <!-- Drill-on-YARN goes into its own directory so it does not end up
+ on the Drillbit class path. Define the jars/tools folder as any jar
+ we ship that is not used by drillbits. -->
+ <includes>
+ <include>org.apache.drill:drill-yarn:jar</include>
+ </includes>
+ <outputDirectory>jars/tools</outputDirectory>
+ <useProjectArtifact>false</useProjectArtifact>
+ </dependencySet>
<dependencySet>
@@ -324,6 +334,21 @@
<outputDirectory>bin</outputDirectory>
</file>
<file>
+ <source>src/resources/drill-on-yarn.sh</source>
+ <fileMode>0750</fileMode>
+ <outputDirectory>bin</outputDirectory>
+ </file>
+ <file>
+ <source>src/resources/drill-am.sh</source>
+ <fileMode>0750</fileMode>
+ <outputDirectory>bin</outputDirectory>
+ </file>
+ <file>
+ <source>src/resources/yarn-drillbit.sh</source>
+ <fileMode>0750</fileMode>
+ <outputDirectory>bin</outputDirectory>
+ </file>
+ <file>
<source>src/resources/submit_plan</source>
<fileMode>0755</fileMode>
<outputDirectory>bin</outputDirectory>
@@ -337,6 +362,16 @@
<outputDirectory>conf</outputDirectory>
</file>
<file>
+ <source>src/resources/yarn-client-log.xml</source>
+ <outputDirectory>conf</outputDirectory>
+ <fileMode>0640</fileMode>
+ </file>
+ <file>
+ <source>src/resources/drill-am-log.xml</source>
+ <outputDirectory>conf</outputDirectory>
+ <fileMode>0640</fileMode>
+ </file>
+ <file>
<source>src/resources/drill-env.sh</source>
<fileMode>0755</fileMode>
<outputDirectory>conf</outputDirectory>
@@ -373,5 +408,10 @@
<source>src/resources/saffron.properties</source>
<outputDirectory>conf</outputDirectory>
</file>
+ <file>
+ <source>src/resources/drill-on-yarn-example.conf</source>
+ <outputDirectory>conf</outputDirectory>
+ <fileMode>0640</fileMode>
+ </file>
</files>
</assembly>
http://git-wip-us.apache.org/repos/asf/drill/blob/f2ac8749/distribution/src/resources/drill-am-log.xml
----------------------------------------------------------------------
diff --git a/distribution/src/resources/drill-am-log.xml b/distribution/src/resources/drill-am-log.xml
new file mode 100644
index 0000000..77fc37c
--- /dev/null
+++ b/distribution/src/resources/drill-am-log.xml
@@ -0,0 +1,54 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!--
+ Logging settings for the Drill-on-YARN application master.
+ See http://logback.qos.ch/manual/index.html for more information.
+
+ YARN already captures stdout and stderr, and the AM produces
+ moderate amount of logging, so the logging simply goes to stdout
+ and from there into the YARN-provided output log directory.
+-->
+<configuration>
+
+ <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+ <encoder>
+ <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+ </pattern>
+ </encoder>
+ </appender>
+
+ <logger name="org.apache.drill" additivity="false">
+ <level value="info" />
+ <appender-ref ref="STDOUT" />
+ </logger>
+
+ <!-- All Drill-on-YARN code is under the org.apache.drill.yarn package.
+ Level defaults to info, which provides an overview of AM activities.
+ Set to "error" if the information is not needed.
+ -->
+ <logger name="org.apache.drill.yarn" additivity="false">
+ <level value="info" />
+ <appender-ref ref="STDOUT" />
+ </logger>
+
+ <root>
+ <level value="error" />
+ <appender-ref ref="STDOUT" />
+ </root>
+
+</configuration>
http://git-wip-us.apache.org/repos/asf/drill/blob/f2ac8749/distribution/src/resources/drill-am.sh
----------------------------------------------------------------------
diff --git a/distribution/src/resources/drill-am.sh b/distribution/src/resources/drill-am.sh
new file mode 100644
index 0000000..3a8bdce
--- /dev/null
+++ b/distribution/src/resources/drill-am.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Launch script for the Drill Application Master (AM).
+# This script runs under YARN and assumes the environment that YARN provides to an AM.
+# This script likely will not work from the command line.
+#
+# Environment variables set by the client:
+#
+# DRILL_DEBUG Set to 1 to print environment and other information to
+# diagnose problems.
+# DRILL_AM_HEAP AM heap memory. (The AM uses no direct memory.)
+# DRILL_AM_JAVA_OPT Optional additional JVM options for the AM, such as
+# options to enable debugging.
+#
+# The following environment variables are set in the AM launch context,
+# not used by this script, but used the the AM itself.
+#
+# DRILL_AM_APP_ID Informs the AM of its YARN application ID.
+# (Strangely, YARN provides no way for an AM to learn this
+# from YARN itself.)
+# YARN_RM_WEBAPP Informs the AM of the URL to the YARN RM web app.
+# Again, YARN informs the Client of this information, but
+# not the AM.
+# DRILL_ARCHIVE The DFS path to the Drill archive used to localize Drillbit
+# code.
+# SITE_ARCHIVE The DFS path to the optional site archive used to localize
+# Drillbit configuration.
+#
+# Further, this script infers DRILL_HOME from the location
+# of the script itself. The site directory (if used) is provided
+# via the --config command-line option.
+
+# YARN requires that the AM run as a child process until completion; so this script
+# does not launch the AM in the background.
+
+# This script is run from $DRILL_HOME/bin, wherever the user has configured it.
+
+bin=`dirname "${BASH_SOURCE-$0}"`
+bin=`cd "$bin">/dev/null; pwd`
+DRILL_HOME=`cd "$bin/..">/dev/null; pwd`
+
+if [ -n "$DRILL_DEBUG" ]; then
+ echo
+ echo "Drill AM launch script"
+ echo "DRILL_HOME: $DRILL_HOME"
+fi
+
+# AM-specific options for drill-config.sh. The AM
+# code is in the tools folder which is not loaded by
+# the Drillbit, only by the AM and client.
+#
+# Add the Hadoop config directory which we need to gain access to
+# YARN and HDFS. This is an odd location to add the config dir,
+# but if we add it sooner, Jersey complains with many class not
+# found errors for reasons not yet known. Note that, to add the
+# Hadoop config here, the Drill 1.6 $DRILL_HOME/conf/core-site.xml file
+# MUST have been removed or renamed else Hadoop will pick up
+# our dummy file instead of the real Hadoop file.
+
+DRILL_TOOL_CP="$DRILL_HOME/jars/tools/*:$HADOOP_CONF_DIR"
+
+# Use Drill's standard configuration, including drill-env.sh.
+# The AM discards most of the information, but does use JAVA
+# and a few others.
+
+. "$DRILL_HOME/bin/drill-config.sh"
+
+# DRILL_AM_HEAP and DRILL_AM_JAVA_OPTS are set by the
+# Drill client via YARN. To set these, use the following
+# configuration options:
+#
+# DRILL_AM_HEAP: drill.yarn.am.heap
+# DRILL_AM_JAVA_OPTS: drill.yarn.am.vm-args
+
+DRILL_AM_HEAP="${DRILL_AM_HEAP:-512M}"
+
+# AM logging setup. Note: the Drillbit log file uses the default name
+# of logback.xml.
+# The AM uses a non-default log configuration file name.
+# So, we must tell the AM to use an AM-specific file
+# else we'll get warnings about the log.query.path system property
+# not being set (and we won't pick up the AM logging settings.)
+# See http://logback.qos.ch/manual/configuration.html
+# The name provided must be on the class path. By adding
+# the site dir before $DRILL_HOME/conf, the user can
+# provide a custom config without editing the default one.
+# If this is wrong, you will see files such as
+# log.path_IS_UNDEFINED in the launch directory.
+
+AM_LOG_CONF="-Dlogback.configurationFile=drill-am-log.xml"
+#SITE_OPT="-Ddrill.yarn.siteDir=$DRILL_CONF_DIR"
+
+AM_JAVA_OPTS="-Xms$DRILL_AM_HEAP -Xmx$DRILL_AM_HEAP -XX:MaxPermSize=512M"
+AM_JAVA_OPTS="$AM_JAVA_OPTS $SITE_OPT $DRILL_AM_JAVA_OPTS $AM_LOG_CONF"
+if [ -n "$DRILL_JAVA_LIB_PATH" ]; then
+ AM_JAVA_OPTS="$AM_JAVA_OPTS -Djava.library.path=$DRILL_JAVA_LIB_PATH"
+fi
+
+# drill-config.sh built the class path.
+# Note that the class path uses the Hadoop, YARN and DFS jars
+# packaged with Drill; not those from the YARN-provided
+# environment variables in the launch context.
+
+AMCMD="$JAVA $AM_JAVA_OPTS ${args[@]} -cp $CP org.apache.drill.yarn.appMaster.DrillApplicationMaster"
+
+if [ -n "$DRILL_DEBUG" ]; then
+ echo "AM launch environment:"
+ echo "-----------------------------------"
+ env
+ echo "-----------------------------------"
+ echo "Command:"
+ echo "$AMCMD"
+fi
+
+# Note: no need to capture output, YARN does that for us.
+# AM is launched as a child process of caller, replacing this script.
+
+# Replace this script process with the AM. Needed so that
+# the YARN node manager can kill the AM if necessary by killing
+# the PID for this script.
+
+exec $AMCMD
http://git-wip-us.apache.org/repos/asf/drill/blob/f2ac8749/distribution/src/resources/drill-on-yarn-example.conf
----------------------------------------------------------------------
diff --git a/distribution/src/resources/drill-on-yarn-example.conf b/distribution/src/resources/drill-on-yarn-example.conf
new file mode 100644
index 0000000..1076676
--- /dev/null
+++ b/distribution/src/resources/drill-on-yarn-example.conf
@@ -0,0 +1,204 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ---------------------------------------------------------------------------
+# Configuration for the Drill-on-YARN feature. Provides information needed to
+# launch the Drill Application Master under YARN, and to configure the
+# Drill cluster.
+#
+# This file is in HOCON format, see https://github.com/typesafehub/config/blob/master/HOCON.md
+# for more information.
+#
+# To create your own configuration file, copy only those lines that you want
+# to change. All values have reasonable defaults.
+
+drill.yarn: {
+
+ # Name displayed in the YARN management UI for your Drill cluster.
+
+ app-name: "Drill-on-YARN"
+
+ dfs: {
+ # Connection to the distributed file system. Defaults to work with
+ # a single-node Drill on the local machine.
+ # Omit this if you want to get the configuration either from the
+ # Hadoop config (set with config-dir above) or from the
+ # $DRILL_HOME/core-site.xml.
+
+ connection: "hdfs://localhost/"
+
+ # The distributed file system (DFS such as HDFS) directory in which to store the Drill
+ # archive file. Change this if you launch multiple Drill clusters to give
+ # each cluster it's own upload location.
+
+ app-dir: "/users/drill"
+ }
+
+ yarn: {
+
+ # Specify the queue you want to use when launching Drill-on-YARN.
+
+ queue: "default"
+ }
+
+ drill-install: {
+
+ # The archive (in .tar.gz format) that contains your Drill software, your custom jar files,
+ # and your Drill configuration. This is the path the file on your client machine.
+ # The base name is used when uploading the file to DFS.
+
+ client-path: "/etc/drill/drill.tar.gz"
+
+ # Set this if the name of the directory inside your archive file differs from
+ # the name of the file. That is, by default, my-drill.tar.gz should contain
+ # my-drill as its directory name.
+
+ # dir-name: "drill"
+
+ # Note that there are no parameters for the site directory. You specify the
+ # site directory location on client launch; Drill-on-YARN automatically
+ # creates an archive of that directory.
+
+ # Set the Java java.library.path option to files that pre-exist on
+ # each Drillbit node. (This is not for libraries that are distributed
+ # by YARN.) (Place Drill-specific libraries in $DRILL_SITE/lib.)
+
+ library-path: "/opt/libs"
+ }
+
+ # Application Master settings. You should not have to change these.
+
+ am: {
+
+ # Heap memory for the AM. Change this only if you run a large cluster and
+ # encounter memory errors.
+
+ heap: "450M"
+
+ # Amount of memory to allocate for the YARN container. Adjust this only if
+ # you adjust the heap amount above.
+
+ memory-mb: 512
+
+ # Optional label to apply to the AM request. Restricts the AM to run on
+ # only nodes with the label. Leave blank unless you need to restrict AM
+ # location.
+
+ node-label-expr: "drill-am"
+ }
+
+ # Settings for the Application Master's internal web server.
+
+ http: {
+
+ # Listen port. Change this if you run multiple Drill clusters as
+ # YARN may allocate two or more AMs on the same YARN worker node.
+
+ port: 8048
+
+ # Turn this on to enable HTTPS for the AM web UI.
+
+ ssl-enabled: true
+
+ # The type of authorization to use for the AM web UI. One of
+ # "none" (no authorization, the default), "drill" (use Drill's
+ # configured authentication system), or "simple" (use the user name
+ # and password defined here.) With Drill authentication enabled, only
+ # the user that launched Drill-on-YARN can access the Drill-on-YARN
+ # web UI. With simple authentication, then the defined user name and
+ # password is the only valid user.
+
+ auth-type: "drill"
+
+ # Set only for the simple auth type.
+
+ user_name: "fred"
+ password: "wilma"
+
+ # Set this to some value to protect the management REST calls.
+ # When the key is set, each rest call must include the key=value
+ # parameter. This is not high security, just a speed-bump to prevent
+ # accidental cluster changes.
+
+ rest-key=""
+ }
+
+ # Configuration for each Drillbit. Drill requires that all Drillbits use the
+ # same configuration.
+
+ drillbit: {
+
+ # The amount of heap memory to allocate to Drill. This is the equivalent of the
+ # DRILL_HEAP environment variable in drill-env.sh when launching Drill outside
+ # of YARN.
+
+ heap: "4G"
+
+ # The maximum amount of direct memory to allocate to Drill. This is the equivalent of the
+ # DRILL_MAX_DIRECT_MEMORY environment variable in drill-env.sh when launching Drill outside
+ # of YARN.
+
+ max-direct-memory: "8G"
+
+ # The amount of memory to alocate to the JVM code cache which holds the classes
+ # that Drill creates dynamically for each query execution. The default setting is
+ # fine for most applications.
+
+ code-cache: "1G"
+
+ # The amount of memory to allocate to each Drill-bit. This is the amount of memory requested
+ # from YARN for each drill-bit. This must be sufficient for
+ # heap + max-direct-memory + code-cache + JVM overhead (assume 1G)
+
+ memory-mb: 14336
+
+ # The number of YARN virtual cores to allocate to each drill-bit.
+
+ vcores: 4
+
+ # The number of disk channels (or spindles) that Drill typically uses.
+ # This value is supported only only selected YARN distributions.
+
+ disks: 3
+
+ # Add to the class path. Use for things such as HBase, etc. which must be installed
+ # on every node. (Drill automatically adds its own class path as well as the one
+ # provided by YARN.)
+
+ classpath: ""
+ }
+
+ # The groups of hosts on which to launch Drill. Each group can be one of several types.
+ #
+ # Note: This version supports a single group. The example below has multiple groups
+ # only to illustrate the various group types.
+
+ cluster: [
+ {
+ # Arbitrary name, just used for error and statistics reporting.
+ # Defaults to "group-" group index, starting at 1.
+
+ name: "group1"
+
+ # Basic group: let YARN pick any available node.
+
+ type: "basic"
+
+ # Number of Drill-bits to run. (Drill-on-YARN runs each on a separate host,
+ # so this is also the number of hosts that will run Drill.)
+
+ count: 2
+ }
+ ]
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/f2ac8749/distribution/src/resources/drill-on-yarn.sh
----------------------------------------------------------------------
diff --git a/distribution/src/resources/drill-on-yarn.sh b/distribution/src/resources/drill-on-yarn.sh
new file mode 100644
index 0000000..a84b0a8
--- /dev/null
+++ b/distribution/src/resources/drill-on-yarn.sh
@@ -0,0 +1,74 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Run the Drill-on-YARN client to launch Drill on a YARN cluster.
+# Uses the $DRILL_HOME/conf/drill-on-yarn.conf file for client options.
+# Uses the $DRILL_HOME/conf/drill-cluster.conf file for cluster options.
+#
+# The config files (along with the Drill config files) MUST be in the
+# $DRILL_HOME directory so that they are properly localized. Drill-on-YARN does not permit
+# placing configuration files outside the $DRILL_HOME directory.
+#
+# Requires the location of Hadoop home. Maybe passed using the --hadoop option,
+# set in the environment, or set in $DRILL_HOME/conf/yarn-env.sh.
+
+usage="Usage: drill-on-yarn.sh start|stop|status"
+
+bin=`dirname "${BASH_SOURCE-$0}"`
+bin=`cd "$bin">/dev/null; pwd`
+export DRILL_HOME=`cd "$bin/..">/dev/null; pwd`
+
+# Use Drillbit's config script. We throw away most of the information, we really just
+# need JAVA_HOME and HADOOP_CONF_DIR or HADOOP_HOME.
+
+DRILL_TOOL_CP="$DRILL_HOME/jars/tools/*"
+. "$DRILL_HOME/bin/drill-config.sh"
+
+# Hadoop config or home is required
+if [ -z "$HADOOP_CONF_DIR" ]; then
+ if [ -z "$HADOOP_HOME" ]; then
+ echo "Hadoop home undefined: set HADOOP_CONF_DIR, HADOOP_HOME" >&2
+ exit 1
+ fi
+ HADOOP_CONF_DIR="$HADOOP_HOME/etc/hadoop"
+fi
+
+DRILL_CLIENT_HEAP=${DRILL_CLIENT_HEAP:-512M}
+VM_OPTS="-Xms$DRILL_CLIENT_HEAP -Xmx$DRILL_CLIENT_HEAP $DRILL_CLIENT_VM_OPTS"
+VM_OPTS="$VM_OPTS -Dlogback.configurationFile=yarn-client-log.xml"
+#VM_OPTS="$VM_OPTS -Ddrill.yarn.siteDir=$DRILL_CONF_DIR"
+
+# Add Hadoop configuration at the end of the class path. This will
+# fail if the 1.6-and earlier core-site.xml file resides in the conf
+# directory.
+
+CP="$CP:$HADOOP_CONF_DIR"
+
+if [ ${#args[@]} = 0 ]; then
+ echo $usage
+ exit 1
+fi
+
+CLIENT_CMD="$JAVA $VM_OPTS -cp $CP org.apache.drill.yarn.client.DrillOnYarn ${args[@]}"
+
+case ${args[0]} in
+debug)
+ env
+ echo "Command: $CLIENT_CMD"
+ ;;
+*)
+ exec $CLIENT_CMD
+ ;;
+esac
http://git-wip-us.apache.org/repos/asf/drill/blob/f2ac8749/distribution/src/resources/yarn-client-log.xml
----------------------------------------------------------------------
diff --git a/distribution/src/resources/yarn-client-log.xml b/distribution/src/resources/yarn-client-log.xml
new file mode 100644
index 0000000..3f84726
--- /dev/null
+++ b/distribution/src/resources/yarn-client-log.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!--
+ Logging settings for the Drill-on-YARN client application. Not that the
+ client does not do much logging, it relies on the -v (verbose) option
+ instead.
+ See http://logback.qos.ch/manual/index.html for more information.
+-->
+<configuration>
+
+ <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+ <encoder>
+ <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+ </pattern>
+ </encoder>
+ </appender>
+
+ <!-- All Drill-on-YARN code is under the org.apache.drill.yarn package. -->
+ <logger name="org.apache.drill.yarn" additivity="false">
+ <level value="error" />
+ <appender-ref ref="STDOUT" />
+ </logger>
+
+ <root>
+ <level value="error" />
+ <appender-ref ref="STDOUT" />
+ </root>
+
+</configuration>
http://git-wip-us.apache.org/repos/asf/drill/blob/f2ac8749/distribution/src/resources/yarn-drillbit.sh
----------------------------------------------------------------------
diff --git a/distribution/src/resources/yarn-drillbit.sh b/distribution/src/resources/yarn-drillbit.sh
new file mode 100644
index 0000000..72b7f6e
--- /dev/null
+++ b/distribution/src/resources/yarn-drillbit.sh
@@ -0,0 +1,178 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Internal script to launch a Drillbit under YARN. Not for use outside
+# of YARN.
+
+# This is a heavily modified version of drillbit.sh, drill-config.sh and
+# runbit, modified for use in YARN and performing a single operation:
+# launching a Drillbit and waiting for Drillbit exit.
+#
+# Under YARN, the typical way to launch a Java app is to do all the
+# setup in Java code in the launch context. However, Drill depends on
+# drill-env.sh to set site-specific options before launch. This script
+# performs Drill launch, integrating options from drill-env.sh.
+#
+# Input environment variables:
+#
+# DRILL_DEBUG
+# Used to debug this script. Dumps debugging information.
+# Set from the drill.yarn.drillbit.debug-launch config parameter.
+# DRILL_HOME
+# Identifies the Drill software to use: either at a fixed
+# location, or localized. Set from the drill.yarn.drill-install
+# localize and drill-home config parameters. Note that this
+# variable tells this script where to find the drill-env.sh
+# file (in $DRILL_HOME/conf), and so DRILL_HOME cannot be
+# overridden in drill-env.sh.
+# DRILL_LOG_DIR
+# The location to which to write log files. Often set in drill-env.sh.
+# Non-YARN launch provides a number of default locations. This variable
+# is ignored in a YARN launch if the drill.yarn.drillbit.yarn-logs is
+# true (the default.) Set the config variable to false to use this
+# value for the long directory under YARN.
+# DRILL_YARN_LOG_DIR
+# If using the YARN log directory, this variable points to that location.
+# If drill.yarn.drillbit.disable-yarn-logs is true, then this variable is not
+# set and the DRILL_LOG_DIR is used instead.
+# DRILL_MAX_DIRECT_MEMORY:
+# The amount of direct memory set in the
+# drill.yarn.drillbit.max-direct-memory config parameter.
+# When Drill is run outside of YARN, this value is set in drill-env.sh.
+# But doing so under YARN decouples the Drill memory settings from the
+# YARN settings. If you do not set the config parameter, Drill will
+# default to the value set in drill-env.sh.
+# DRILL_HEAP
+# The amount of Java heap memory set in the
+# drill.yarn.drillbit.heap setting. Same override rules as
+# DRILL_MAX_DIRECT_MEMORY.
+# DRILL_JAVA_OPTS
+# The standard JVM options needed to launch Drill. Must be set in
+# drill-env.sh.
+# DRILL_JVM_OPTS
+# Additional YARN-specific JVM options set in the
+# drill.yarn.drillbit.vm-args config parameter. Note that the YARN-specific
+# options are in addition to (not an override of) the DRILL_JAVA_OPTS
+# values.
+# SERVER_GC_OPTS
+# Garbage collection (GC) related JVM options set in drill-env.sh. Not
+# overridden in YARN.
+# HADOOP_HOME
+# Location of the Hadoop software and configuration. Can be
+# set with the drill.yarn.hadoop.home or in drill-env.sh. If both are set, the
+# latter value overrides the former.
+# JAVA_HOME
+# Set by YARN, bypassing Drill's usual process for searching for JAVA_HOME.
+# DRILL_CONF_DIR
+# Location of Drill's configuration files. Non-YARN launch provides a set of
+# defaults. Under YARN, this value must either be set explicitly using the
+# drill.yarn.drillbit.conf-dir parameter, or will default to
+# the (typically localized) $DRILL_HOME/conf directory.
+# DRILL_CLASSPATH_PREFIX
+# Optional extra classpath added before Drill's own jars. Set from the
+# drill.yarn.drillbit.prefix-class-path config parameter, or in
+# drill-env.sh. As with all class path settings, if both values are set,
+# drill-env.sh takes precedence.
+# EXTN_CLASSPATH
+# Optional extra classpath added after Drill's own jars but before
+# 3rd party jars. Allows overriding Drill's bundled version of Hadoop
+# and so on. Allows adding jars needed by plug-ins. Config parameter
+# is drill.yarn.drillbit.extn-class-path.
+# HADOOP_CLASSPATH
+# As above, but for the Hadoop class path. Config parameter is
+# drill.yarn.drillbit.hadoop-class-path. This is a legacy setting. Use
+# drill.yarn.drillbit.extn-class-path for new deployments.
+# HBASE_CLASSPATH
+# As above, but for the Hbase class path. Config parameter is
+# drill.yarn.drillbit.hbase-class-path. This is a legacy setting. Use
+# drill.yarn.drillbit.extn-class-path for new deployments.
+# DRILL_CLASSPATH
+# Optional extra classpath after all Drill-provided jars. This is the
+# typical place to add jars needed by plugins, etc. (Note, no need to set
+# this if the jars reside in the $DRILL_HOME/jars/3rdparty directory.)
+# Config parameter is drill.yarn.drillbit.drill-classpath.
+# DRILL_JVM_OPTS
+# Additional JVM options passed via YARN from the
+# drill.yarn.drillbit.vm-args parameter.
+# ENABLE_GC_LOG
+# Enables Java GC logging. Passed from the drill.yarn.drillbit.log-gc
+# garbage collection option.
+
+if [ -n "$DRILL_DEBUG" ]; then
+ echo
+ echo "Drillbit Environment from YARN:"
+ echo "-----------------------------------"
+ env
+ echo "-----------------------------------"
+fi
+
+# DRILL_HOME is set by the AM to point to the Drill distribution.
+
+# In YARN, configuration defaults to the the standard location.
+
+DRILL_CONF_DIR=${DRILL_CONF_DIR:-$DRILL_HOME/conf}
+
+# On some distributions, YARN sets HADOOP_CLASSPATH, but for a different
+# purpose than the one to which Drill puts that variable. Unset the YARN
+# version and only use the variable if drill-env.sh sets it.
+
+unset HADOOP_CLASSPATH
+unset HBASE_CLASSPATH
+
+# Replace the values with the values from the Drill-on-YARN config
+# file, if given.
+
+if [ -n "$DRILL_HADOOP_CLASSPATH" ]; then
+ export HADOOP_CLASSPATH="$DRILL_HADOOP_CLASSPATH"
+fi
+if [ -n "$DRILL_HBASE_CLASSPATH" ]; then
+ export HBASE_CLASSPATH="$DRILL_HBASE_CLASSPATH"
+fi
+
+# HADOOP_HOME can be set by Drill-on-YARN, or in drill-env.sh. The
+# latter takes precedence. It seems that YARN does not use the
+# HADOOP_HOME variable; instead it provides HADOOP_COMMON_HOME,
+# HADOOP_HDFS_HOME, and HADOOP_YARN_HOME.
+#
+# Default HADOOP_COMMON_HOME if HADOOP_HOME is not otherwise set in a YARN
+# environment.
+
+if [ -n "$HADOOP_HOME" ]; then
+ export HADOOP_HOME="$HADOOP_COMMON_HOME"
+fi
+
+# Use Drill's standard configuration, including drill-env.sh.
+
+. "$DRILL_HOME/bin/drill-config.sh"
+
+# Debugging information
+
+if [ -n "$DRILL_DEBUG" ]; then
+ echo "Command:"
+ "$DRILL_HOME/bin/runbit" debug
+ echo
+ echo "Local Environment:"
+ echo "-----------------------------------"
+ set
+ echo "-----------------------------------"
+fi
+
+# Launch Drill itself.
+# Passes along Drill's exit code as our own.
+
+echo "`date` Starting drillbit on `hostname` under YARN, logging to $DRILLBIT_LOG_PATH"
+echo "`ulimit -a`" >> "$DRILLBIT_LOG_PATH" 2>&1
+
+"$DRILL_HOME/bin/runbit" exec
http://git-wip-us.apache.org/repos/asf/drill/blob/f2ac8749/drill-yarn/README.md
----------------------------------------------------------------------
diff --git a/drill-yarn/README.md b/drill-yarn/README.md
new file mode 100644
index 0000000..1794afb
--- /dev/null
+++ b/drill-yarn/README.md
@@ -0,0 +1,190 @@
+# Drill-on-YARN: YARN Integration for Apache Drill
+
+Drill-on-YARN (DoY) runs Apache Drill as a long-running process under Apache YARN. Key components
+include:
+
+1. The Drill-on-YARN client: starts, stops, resizes and checks the status of your Drill cluster.
+2. Drill-on-YARN Application Master (AM): a long-running process under YARN that uses YARN
+to manage your Drill cluster.
+3. Drillbits: The Drill server process, now started by YARN rather than by hand or other
+ad-hoc scripts.
+4. Configuration: Cluster configuration now resides in a drill-on-yarn.conf.
+
+Extensive user documentation is to be available on the Apache Drill site. Until then,
+a user guide is attached to [DRILL-1170](https://issues.apache.org/jira/browse/DRILL-1170).
+
+## Building
+
+Drill-on-YARN builds as part of Apache Drill. The build produces a new DoY jar
+which resides in a new `$DRILL_HOME/jars/tools` folder so that the DoY classes are not
+visible to Drill itself.
+
+## Understanding the Code
+
+The DoY code divides into three main modules:
+
+1. The DoY command-line client application.
+2. The DoY AM server application.
+3. Scripts around the client, AM and Drillbit applications.
+
+Scripts reside in the distribution project under `src/resources`.
+
+All DoY code resides in this module in the `org.apache.drill.yarn` package.
+
+- `client`: The command-line client application.
+- `appMaster`: The DoY AM.
+- `core`: Code shared between the client and AM.
+- `zk`: Integration with ZooKeeper to monitor Drillbits.
+
+DoY depends on Drill modules, but only the `distribution` project depends on
+DoY.
+
+Because DoY is a YARN application, we recommend that you become familiar with
+YARN itself before diving into the DoY code. YARN has a very particular model
+for how to run distributed applications and that model drove the design of
+the DoY modules and classes.
+
+### Major Components
+
+The following diagram shows the major DoY components and how they relate to
+the YARN components:
+
+![System Overview](./img/overview.png)
+
+The client communicates with the YARN Resource Manager (RM) to request the start
+of the DoY AM. The RM locates a node to run the AM's container and asks the
+Node Manager (NM) on that node to start the AM. The AM starts and registers
+itself with ZooKeeper to prevent multiple AMs for the same Drill cluster.
+
+The AM then requests containers from the RM in which to run Drillbits. Next, the
+AM asks the assigned NMs to start each Drillbit. The Drillbit starts and
+registers itself with ZooKeeper (ZK). The AM monitors
+ZK to confirm that the Drillbit did, in fact, start.
+
+To shut down, the client contacts the AM directly using the AM REST API
+and requests shutdown.
+The AM sends a kill request to each NM, which kills the Drillbit processes.
+The AM monitors ZK to confirm that the Drillbit has dropped its registration.
+Once the last Drillbit has completed, the AM itself exits. The client will
+wait (up to a limit) for the AM to shut down so that the client can report
+as successful shutdown.
+
+### Client
+
+The client application consists of a main class, `DrillOnYarn` and a set of
+command classes. Each command performs one operation, such as start, stop,
+resize, and so on. The client is designed to start, perform one operation,
+and exit. That is, while the AM is a persistent process, the client is not.
+
+A user will start their Drill cluster, then later will want to stop it. The
+Drill cluster is a YARN application, represented by YARN with
+an "application id" (app id). To stop a Drill cluster, the client needs the
+app id assigned to the application at start time. While the user can use the
+`-a` option to provide the app id explicitly, it is more convenient for
+the client to "remember" the
+app id. DoY uses an "app id file" for this purpose. This convenience works
+if the user starts, manages and stops the cluster from a single host.
+
+The following diagram shows the major classes in the DoY client:
+
+![Client Overview](./img/client-classes.png)
+
+
+The client uses a "facade" to communicate with YARN. The facade,
+`YarnRMClient`, interfaces to YARN to perform the required YARN operations.
+Similarly, another facade, `DfsFacade`, provides a layer on top of the HDFS
+API. The facades simplify code and provide an abstraction handy for mocking
+these systems during unit testing.
+
+YARN simplifies the task of running Drill (or any other application) by
+"localizing" the required files onto each worker node. The localization process
+starts with the client uploading the files to the distributed file system (DFS),
+typically HDFS. DoY localizes two separate files. The first is the Drill software
+itself, typically using the original Drill archive from Apache or your distribution.
+Drill requires site-specific configuration, optionally including custom code
+for user-defined functions (UDFs), etc. Site files reside in a Drill
+site directory. For YARN, the site
+directory must be outside of the drill software distribution (see the user
+documentation for details.) DoY archives the site directory and uploads it to
+DFS along with the Drill archive. The code that does that work resides in the
+`FileUploader` class.
+
+To start a Drill cluster, the client asks YARN to launch the AM by specifying
+a large number of detailed options: environment variables, files, command
+line to run, and so on. This work is done in the `AMRunner` class.
+
+## Application Master
+
+The AM must perform several tasks, including:
+
+* Maintain the desired number of Drillbits.
+* Work with YARN to request a container for each Drillbit, and to launch
+the Drillbit.
+* Ensure that YARN allocates only one Drillbit container per cluster host.
+(Required because all Drillbits within a cluster share the same set of ports.)
+* Monitor Zookeeper to watch Drillbits. Drillbits perform a heartbeat with
+ZK, which the AM can monitor. The AM will restart any Drillbit that drops out
+of ZK, since such a Drillbit is likely in a bad state.
+* Provide a Web UI to monitor and manage the cluster.
+* Provide a REST API that the client uses to communicate directly with the AM.
+
+The AM is composed of a number of components. The following diagram shows the
+major classses involved in setting up the AM:
+
+![AM Overview](./img/am-overview.png)
+
+he `DrillApplicationMaster` class is the main AM program. It has to key
+tasks: 1) create the `DrillControllerFactory` that assembles the required
+parts of the AM, and 2) runs the `Dispatcher`, which is the actual AM server.
+
+The AM is designed to be generic; Drill-specific bits are abstracted out into
+helpers. This design simplifies testing and also anticipates that Drill may
+eventually include other, specialized, servers. The `DrillControllerFactory`
+is the class that pulls together all the Drill-specific pieces to assemble
+the server. During testing, different factories are used to assemble a test
+server.
+
+The `Dispatcher` receives events from YARN, from the REST API and from a timer
+and routes them to the `ClusterController` which takes actions based on the
+events. This structure separates the API aspects of working with YARN (in the
+`Dispatcher`) from the logic of running a cluster (in the `ClusterController`.)
+
+The `ClusterController` attempts to keep the cluster in the desired state. Today
+this means running a specified number of Drillbits. In the future, DoY may
+support multiple Drillbit groups (one set that runs all the time, say, and another
+that runs only during the day when needed for interactive users.)
+
+A large amount of detailed fiddling is needed to propertly request a container
+for a Drillbit, launch the Drillbit, monitor it and shut it down. The `Task`
+class monitors the lifecycle of each task (here, a Drillbit). Behavior of the
+task differs depending on the task's state. The `TaskState` class, and its
+subclasses, provide the task-specific behavior. For example, handling of a
+task cancellation is different depending on whether the task is in the
+`RequestingState` or in the `RunningState`.
+
+The following diagram illustrates some of the details of the cluster controller
+system.
+
+![Controller Detail](./img/controller-classes.png)
+
+Some events are time based. For example, a Drillbit is given a certain amount
+of time to register itself in ZK before DoY assumes that the Drillbit is
+unhealthy and is restarted. The `PulseRunnable` is the thread that implements
+the timer; `Pollable` is the listener for each "tick" event.
+
+The `Scheduler` and its subclasses (such as `DrillbitScheduler`) maintain the
+desired number of Drillbits, asking the `ClusterController` to start and stop
+tasks as needed. The `Scheduler` also handles task-specific tasks. At present,
+Drill has no means to perform a graceful shutdown. However, when Drill does,
+the `DrillbitScheduler` will be responsible for sending the required message.
+
+The `appMaster.http` package contains the implementation for the web UI and
+REST API using an embedded Jetty server. If Drill security is enabled, the
+web UI will prompt the user to log in. The only recognized user is the one
+that launched DoY.
+
+The `NodeRegistry` tracks the set of nodes running Drillbits so we can avoid
+starting a second on any of them. Drillbits are started though YARN, of course,
+but can also be "stray": Drillbits started outside of DoY and discovered
+though ZK. Even stray Drillbits are registered to avoid nasty surprises if
+DoY where to try to launch a Drillbit on that same node.