You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/05/10 15:25:03 UTC
[impala] 03/03: IMPALA-8369 (part 2): Hive 3: switch to Tez-on-YARN execution

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 17daa6efb9c3c5c6fbd0908f2176b99d8498a250
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Thu May 2 14:25:57 2019 -0700

    IMPALA-8369 (part 2): Hive 3: switch to Tez-on-YARN execution
    
    This switches away from Tez local mode to tez-on-YARN. After spending a
    couple of days trying to debug issues with Tez local mode, it seemed
    like it was just going to be too much of a lift.
    
    This patch switches on the starting of a Yarn RM and NM when
    USE_CDP_HIVE is enabled. It also switches to a new yarn-site.xml with a
    minimized set of configurations, generated by the new python templating.
    
    In order for everything to work properly I also had to update the Hadoop
    dependency to come from CDP instead of CDH when using CDP Hive.
    Otherwise, the classpath of the launched Tez containers had conflicting
    versions of various Hadoop classes which caused tasks to fail.
    
    I verified that this fixes concurrent query execution by running queries
    in parallel in two beeline sessions. With local mode, these queries
    would periodically fail due to various races (HIVE-21682). I'm also able
    to get farther along in data loading.
    
    Change-Id: If96064f271582b2790a3cfb3d135f3834d46c41d
    Reviewed-on: http://gerrit.cloudera.org:8080/13224
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Todd Lipcon <to...@apache.org>
---
 bin/bootstrap_toolchain.py                         |   7 +-
 bin/create-test-configuration.sh                   |  11 +-
 bin/generate_xml_config.py                         |   8 +-
 bin/impala-config.sh                               |  10 +-
 bin/jenkins/critique-gerrit-review.py              |   2 +-
 fe/pom.xml                                         |  26 ++-
 .../apache/impala/analysis/CopyTestCaseStmt.java   |   2 +-
 fe/src/test/resources/hive-site.xml.py             |   5 +-
 shaded-deps/pom.xml                                |   1 -
 testdata/cluster/admin                             |  16 ++
 .../common/etc/hadoop/conf/capacity-scheduler.xml  | 223 +++++++++++++++++++++
 .../common/etc/hadoop/conf/yarn-site.xml.py        |  97 +++++++++
 .../common/etc/hadoop/conf/yarn-site.xml.tmpl      | 154 --------------
 13 files changed, 389 insertions(+), 173 deletions(-)

diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index 07a646d..34547fe 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -553,10 +553,10 @@ if __name__ == "__main__":
   toolchain_host = os.environ["IMPALA_TOOLCHAIN_HOST"]
   cdh_build_number = os.environ["CDH_BUILD_NUMBER"]
 
-  cdh_components = map(Package, ["hadoop", "hbase", "sentry"])
+  cdh_components = map(Package, ["hbase", "sentry"])
   use_cdp_hive = os.getenv("USE_CDP_HIVE") == "true"
   if not use_cdp_hive:
-    cdh_components += [Package("hive")]
+    cdh_components += [Package("hive"), Package("hadoop")]
 
   if use_cdh_kudu:
     if not try_get_platform_release_label() or not try_get_platform_release_label().cdh:
@@ -580,12 +580,13 @@ if __name__ == "__main__":
   cdp_components = [
     CdpComponent("ranger-{0}-admin".format(os.environ.get("IMPALA_RANGER_VERSION"))),
   ]
-  use_cdp_hive = os.getenv("USE_CDP_HIVE") == "true"
   if use_cdp_hive:
     hive_version = os.environ.get("IMPALA_HIVE_VERSION")
     cdp_components.append(CdpComponent("hive-{0}-source".format(hive_version),
                           pkg_directory="hive-{0}".format(hive_version))),
     cdp_components.append(CdpComponent("apache-hive-{0}-bin".format(hive_version))),
+    cdp_components.append(CdpComponent("hadoop-{0}"
+                          .format(os.environ.get("IMPALA_HADOOP_VERSION")))),
     cdp_components.append(CdpComponent(
         "tez-{0}-minimal".format(os.environ.get("IMPALA_TEZ_VERSION")),
         makedir=True))
diff --git a/bin/create-test-configuration.sh b/bin/create-test-configuration.sh
index 208d4f8..8d08562 100755
--- a/bin/create-test-configuration.sh
+++ b/bin/create-test-configuration.sh
@@ -174,12 +174,12 @@ if [ $CREATE_RANGER_POLICY_DB -eq 1 ]; then
   popd
 fi
 
-echo "Linking core-site.xml from local cluster"
+echo "Linking common conf files from local cluster:"
 CLUSTER_HADOOP_CONF_DIR=$(${CLUSTER_DIR}/admin get_hadoop_client_conf_dir)
-ln -s ${CLUSTER_HADOOP_CONF_DIR}/core-site.xml
-
-echo "Linking hdfs-site.xml from local cluster"
-ln -s ${CLUSTER_HADOOP_CONF_DIR}/hdfs-site.xml
+for file in core-site.xml hdfs-site.xml yarn-site.xml ; do
+  echo ... $file
+  ln -s ${CLUSTER_HADOOP_CONF_DIR}/$file
+done
 
 if ${CLUSTER_DIR}/admin is_kerberized; then
   # KERBEROS TODO: Without this, the yarn daemons can see these
@@ -190,7 +190,6 @@ if ${CLUSTER_DIR}/admin is_kerberized; then
   # kerberos principals. Obviously this has to be sorted out before
   # a kerberized cluster can load data.
   echo "Linking yarn and mapred from local cluster"
-  ln -s ${CLUSTER_HADOOP_CONF_DIR}/yarn-site.xml
   ln -s ${CLUSTER_HADOOP_CONF_DIR}/mapred-site.xml
 fi
 
diff --git a/bin/generate_xml_config.py b/bin/generate_xml_config.py
index a06da7e..18e3615 100755
--- a/bin/generate_xml_config.py
+++ b/bin/generate_xml_config.py
@@ -80,6 +80,8 @@ def dump_config(d, source_path, out):
   print >>out, dedent(header)
   for k, v in sorted(d.iteritems()):
     try:
+      if isinstance(v, int):
+        v = str(v)
       v = _substitute_env_vars(v)
     except KeyError, e:
       raise Exception("failed environment variable substitution for value {k}: {e}"
@@ -98,7 +100,11 @@ def main():
     sys.exit(1)
 
   _, in_path, out_path = sys.argv
-  mod = imp.load_source('template', in_path)
+  try:
+    mod = imp.load_source('template', in_path)
+  except:  # noqa
+    print >>sys.stderr, "Unable to load template: %s" % in_path
+    raise
   conf = mod.__dict__.get('CONFIG')
   if not isinstance(conf, dict):
     raise Exception("module in '{path}' should define a dict named CONFIG"
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index cc8cfef..68de5e4 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -162,7 +162,8 @@ export IMPALA_TOOLCHAIN_HOST
 export CDH_MAJOR_VERSION=6
 export CDH_BUILD_NUMBER=1055188
 export CDP_BUILD_NUMBER=1056671
-export IMPALA_HADOOP_VERSION=3.0.0-cdh6.x-SNAPSHOT
+export CDH_HADOOP_VERSION=3.0.0-cdh6.x-SNAPSHOT
+export CDP_HADOOP_VERSION=3.1.1.6.0.99.0-147
 export IMPALA_HBASE_VERSION=2.1.0-cdh6.x-SNAPSHOT
 export IMPALA_SENTRY_VERSION=2.1.0-cdh6.x-SNAPSHOT
 export IMPALA_RANGER_VERSION=1.2.0.6.0.99.0-147
@@ -200,10 +201,14 @@ if $USE_CDP_HIVE; then
   # the minicluster
   export IMPALA_HIVE_VERSION=${CDP_HIVE_VERSION}
   export IMPALA_TEZ_VERSION=0.9.1.6.0.99.0-147
+  export IMPALA_HADOOP_VERSION=${CDP_HADOOP_VERSION}
+  export HADOOP_HOME="$CDP_COMPONENTS_HOME/hadoop-${CDP_HADOOP_VERSION}/"
 else
   # CDH hive version is used to build and deploy in minicluster when USE_CDP_HIVE is
   # false
   export IMPALA_HIVE_VERSION=${CDH_HIVE_VERSION}
+  export IMPALA_HADOOP_VERSION=${CDH_HADOOP_VERSION}
+  export HADOOP_HOME="$CDH_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}/"
 fi
 # Extract the first component of the hive version.
 # Allow overriding of Hive source location in case we want to build Impala without
@@ -510,9 +515,6 @@ export IMPALA_COMMON_DIR="$IMPALA_HOME/common"
 export PATH="$IMPALA_TOOLCHAIN/gdb-$IMPALA_GDB_VERSION/bin:$PATH"
 export PATH="$IMPALA_HOME/bin:$IMPALA_TOOLCHAIN/cmake-$IMPALA_CMAKE_VERSION/bin/:$PATH"
 
-# Typically we build against a snapshot build of Hadoop that includes everything we need
-# for building Impala and running a minicluster.
-export HADOOP_HOME="$CDH_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}/"
 export HADOOP_CONF_DIR="$IMPALA_FE_DIR/src/test/resources"
 # The include and lib paths are needed to pick up hdfs.h and libhdfs.*
 # Allow overriding in case we want to point to a package/install with a different layout.
diff --git a/bin/jenkins/critique-gerrit-review.py b/bin/jenkins/critique-gerrit-review.py
index 5048a1d..c2bfdb7 100755
--- a/bin/jenkins/critique-gerrit-review.py
+++ b/bin/jenkins/critique-gerrit-review.py
@@ -69,7 +69,7 @@ EXCLUDE_FILE_PATTERNS = [
     re.compile(r".*/catalog/BuiltinsDb.java"),  # Many long strings.
     re.compile(r".*/codegen/gen_ir_descriptions.py"),  # Many long strings.
     re.compile(r".*shell/ext-py/.*"),  # Third-party code.
-    re.compile(r".*/fe/src/test/resources/.*.py") # Long lines in config files.
+    re.compile(r".*/.*\.xml\.py")  # Long lines in config template files.
 ]
 
 
diff --git a/fe/pom.xml b/fe/pom.xml
index 43701b4..ecd79d3 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -53,7 +53,11 @@ under the License.
       <artifactId>hadoop-hdfs</artifactId>
       <version>${hadoop.version}</version>
     </dependency>
-
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs-client</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>
@@ -178,6 +182,10 @@ under the License.
           <groupId>org.apache.hive</groupId>
           <artifactId>*</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
 
@@ -993,6 +1001,22 @@ under the License.
               <groupId>org.apache.hive</groupId>
               <artifactId>hive-shims</artifactId>
             </exclusion>
+            <exclusion>
+              <groupId>org.apache.hadoop</groupId>
+              <artifactId>*</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+        <!-- needed for JobConf, which HiveConf inherits from -->
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-mapreduce-client-core</artifactId>
+          <version>${hadoop.version}</version>
+          <exclusions>
+            <exclusion>
+              <groupId>*</groupId>
+              <artifactId>*</artifactId>
+            </exclusion>
           </exclusions>
         </dependency>
       </dependencies>
diff --git a/fe/src/main/java/org/apache/impala/analysis/CopyTestCaseStmt.java b/fe/src/main/java/org/apache/impala/analysis/CopyTestCaseStmt.java
index 5023963..d961f95 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CopyTestCaseStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CopyTestCaseStmt.java
@@ -17,7 +17,7 @@
 
 package org.apache.impala.analysis;
 
-import avro.shaded.com.google.common.collect.Sets;
+import com.google.common.collect.Sets;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import org.apache.hadoop.fs.FSDataOutputStream;
diff --git a/fe/src/test/resources/hive-site.xml.py b/fe/src/test/resources/hive-site.xml.py
index 18e0011..66e62a7 100644
--- a/fe/src/test/resources/hive-site.xml.py
+++ b/fe/src/test/resources/hive-site.xml.py
@@ -81,7 +81,10 @@ if hive_major_version >= 3:
   CONFIG.update({
    'hive.tez.container.size': '512',
    'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager',
-   'tez.local.mode': 'true'})
+   # We run YARN with Tez on the classpath directly
+   'tez.ignore.lib.uris': 'true',
+   'tez.use.cluster.hadoop-libs': 'true',
+  })
 else:
   CONFIG.update({
    # TODO(vihang) Disabled for HMS3.
diff --git a/shaded-deps/pom.xml b/shaded-deps/pom.xml
index 6aad3c5..579758e 100644
--- a/shaded-deps/pom.xml
+++ b/shaded-deps/pom.xml
@@ -51,7 +51,6 @@ the same dependencies
           <artifactSet>
             <includes>
               <include>org.apache.hive:hive-exec</include>
-              <include>org.apache.hadoop:hadoop-mapreduce-client</include>
             </includes>
           </artifactSet>
           <relocations>
diff --git a/testdata/cluster/admin b/testdata/cluster/admin
index acc44a5..9eafd8c 100755
--- a/testdata/cluster/admin
+++ b/testdata/cluster/admin
@@ -34,6 +34,11 @@ setup_report_build_error
 : ${IMPALA_KERBERIZE=}
 : ${INCLUDE_YARN=}
 
+# For Hive 3, we require Yarn for Tez support.
+if [[ $USE_CDP_HIVE ]]; then
+  INCLUDE_YARN=1
+fi
+
 while getopts vky OPT; do
   case $OPT in
     v) set -x;;
@@ -54,6 +59,7 @@ NODE_PREFIX=node-
 COMMON_NODE_TEMPLATE="$DIR/node_templates/common"
 NODE_TEMPLATE="$DIR/node_templates/cdh$CDH_MAJOR_VERSION"
 TEMPLATE_SUFFIX=".tmpl"
+PY_TEMPLATE_SUFFIX=".xml.py"
 
 # Each process should be marked with this so a "pkill -f" can be done to nuke everything.
 export KILL_CLUSTER_MARKER=IBelongToTheMiniCluster
@@ -237,6 +243,9 @@ function create_cluster {
       # Remove master role scripts from slave nodes
       rm -f "$NODE_DIR/etc/init.d/"{hdfs-namenode,yarn-resourcemanager} \
             "$NODE_DIR/etc/init.d/"{kms,kudu-master}
+      # Only run one YARN nodemanager (more memory-efficient to scale up a
+      # single NM than run several)
+      rm -f "$NODE_DIR/etc/init.d/yarn-nodemanager"
     fi
     for EMPTY_NODE_DIR in $EMPTY_NODE_DIRS; do
       mkdir -p "$NODE_DIR/$EMPTY_NODE_DIR"
@@ -302,6 +311,13 @@ function create_cluster {
       fi
       rm "$TEMPLATE_PATH" "$ACTUAL_PATH.1"
     done
+    # Substitute python-templated XML files.
+    # TODO(todd): move over all the XML templates to be Python-based.
+    for TEMPLATE_PATH in $(find "$NODE_DIR" -name "*$PY_TEMPLATE_SUFFIX"); do
+      ACTUAL_PATH="${TEMPLATE_PATH%$PY_TEMPLATE_SUFFIX}".xml
+      $IMPALA_HOME/bin/generate_xml_config.py $TEMPLATE_PATH $ACTUAL_PATH
+      rm $TEMPLATE_PATH
+    done
   done
 }
 
diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/capacity-scheduler.xml b/testdata/cluster/node_templates/common/etc/hadoop/conf/capacity-scheduler.xml
new file mode 100644
index 0000000..80d4ed1
--- /dev/null
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/capacity-scheduler.xml
@@ -0,0 +1,223 @@
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+
+  NOTE: this is the default capacity-scheduler.xml that ships with
+  YARN. No Impala-specific modifications have been made.
+-->
+<configuration>
+
+  <property>
+    <name>yarn.scheduler.capacity.maximum-applications</name>
+    <value>10000</value>
+    <description>
+      Maximum number of applications that can be pending and running.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
+    <value>0.1</value>
+    <description>
+      Maximum percent of resources in the cluster which can be used to run 
+      application masters i.e. controls number of concurrent running
+      applications.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.resource-calculator</name>
+    <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
+    <description>
+      The ResourceCalculator implementation to be used to compare 
+      Resources in the scheduler.
+      The default i.e. DefaultResourceCalculator only uses Memory while
+      DominantResourceCalculator uses dominant-resource to compare 
+      multi-dimensional resources such as Memory, CPU etc.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.queues</name>
+    <value>default</value>
+    <description>
+      The queues at the this level (root is the root queue).
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.capacity</name>
+    <value>100</value>
+    <description>Default queue target capacity.</description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
+    <value>1</value>
+    <description>
+      Default queue user limit a percentage from 0.0 to 1.0.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
+    <value>100</value>
+    <description>
+      The maximum capacity of the default queue. 
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.state</name>
+    <value>RUNNING</value>
+    <description>
+      The state of the default queue. State can be one of RUNNING or STOPPED.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
+    <value>*</value>
+    <description>
+      The ACL of who can submit jobs to the default queue.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
+    <value>*</value>
+    <description>
+      The ACL of who can administer jobs on the default queue.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.acl_application_max_priority</name>
+    <value>*</value>
+    <description>
+      The ACL of who can submit applications with configured priority.
+      For e.g, [user={name} group={name} max_priority={priority} default_priority={priority}]
+    </description>
+  </property>
+
+   <property>
+     <name>yarn.scheduler.capacity.root.default.maximum-application-lifetime
+     </name>
+     <value>-1</value>
+     <description>
+        Maximum lifetime of an application which is submitted to a queue
+        in seconds. Any value less than or equal to zero will be considered as
+        disabled.
+        This will be a hard time limit for all applications in this
+        queue. If positive value is configured then any application submitted
+        to this queue will be killed after exceeds the configured lifetime.
+        User can also specify lifetime per application basis in
+        application submission context. But user lifetime will be
+        overridden if it exceeds queue maximum lifetime. It is point-in-time
+        configuration.
+        Note : Configuring too low value will result in killing application
+        sooner. This feature is applicable only for leaf queue.
+     </description>
+   </property>
+
+   <property>
+     <name>yarn.scheduler.capacity.root.default.default-application-lifetime
+     </name>
+     <value>-1</value>
+     <description>
+        Default lifetime of an application which is submitted to a queue
+        in seconds. Any value less than or equal to zero will be considered as
+        disabled.
+        If the user has not submitted application with lifetime value then this
+        value will be taken. It is point-in-time configuration.
+        Note : Default lifetime can't exceed maximum lifetime. This feature is
+        applicable only for leaf queue.
+     </description>
+   </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.node-locality-delay</name>
+    <value>40</value>
+    <description>
+      Number of missed scheduling opportunities after which the CapacityScheduler 
+      attempts to schedule rack-local containers.
+      When setting this parameter, the size of the cluster should be taken into account.
+      We use 40 as the default value, which is approximately the number of nodes in one rack.
+      Note, if this value is -1, the locality constraint in the container request
+      will be ignored, which disables the delay scheduling.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.rack-locality-additional-delay</name>
+    <value>-1</value>
+    <description>
+      Number of additional missed scheduling opportunities over the node-locality-delay
+      ones, after which the CapacityScheduler attempts to schedule off-switch containers,
+      instead of rack-local ones.
+      Example: with node-locality-delay=40 and rack-locality-delay=20, the scheduler will
+      attempt rack-local assignments after 40 missed opportunities, and off-switch assignments
+      after 40+20=60 missed opportunities.
+      When setting this parameter, the size of the cluster should be taken into account.
+      We use -1 as the default value, which disables this feature. In this case, the number
+      of missed opportunities for assigning off-switch containers is calculated based on
+      the number of containers and unique locations specified in the resource request,
+      as well as the size of the cluster.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.queue-mappings</name>
+    <value></value>
+    <description>
+      A list of mappings that will be used to assign jobs to queues
+      The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
+      Typically this list will be used to map users to queues,
+      for example, u:%user:%user maps all users to queues with the same name
+      as the user.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
+    <value>false</value>
+    <description>
+      If a queue mapping is present, will it override the value specified
+      by the user? This can be used by administrators to place jobs in queues
+      that are different than the one specified by the user.
+      The default is false.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.per-node-heartbeat.maximum-offswitch-assignments</name>
+    <value>1</value>
+    <description>
+      Controls the number of OFF_SWITCH assignments allowed
+      during a node's heartbeat. Increasing this value can improve
+      scheduling rate for OFF_SWITCH containers. Lower values reduce
+      "clumping" of applications on particular nodes. The default is 1.
+      Legal values are 1-MAX_INT. This config is refreshable.
+    </description>
+  </property>
+
+
+  <property>
+    <name>yarn.scheduler.capacity.application.fail-fast</name>
+    <value>false</value>
+    <description>
+      Whether RM should fail during recovery if previous applications'
+      queue is no longer valid.
+    </description>
+  </property>
+
+</configuration>
diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
new file mode 100644
index 0000000..305feb3
--- /dev/null
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import sys
+
+kerberize = os.environ.get('IMPALA_KERBERIZE') == '1'
+hive_major_version = int(os.environ['IMPALA_HIVE_VERSION'][0])
+
+
+def _get_system_ram_mb():
+  lines = file("/proc/meminfo").readlines()
+  memtotal_line = [l for l in lines if l.startswith('MemTotal')][0]
+  mem_kb = int(memtotal_line.split()[1])
+  return mem_kb / 1024
+
+
+def _get_yarn_nm_ram_mb():
+  sys_ram = _get_system_ram_mb()
+  # Fit into the following envelope:
+  # - need 4GB at a bare minimum
+  # - leave at least 24G for other services
+  # - don't need more than 48G
+  ret = min(max(sys_ram - 24 * 1024, 4096), 48 * 1024)
+  print >>sys.stderr, "Configuring Yarn NM to use {0}MB RAM".format(ret)
+  return ret
+
+
+CONFIG = {
+  # Host/port configs
+  'yarn.resourcemanager.webapp.address': '${EXTERNAL_LISTEN_HOST}:${YARN_WEBUI_PORT}',
+  'yarn.nodemanager.address': '${INTERNAL_LISTEN_HOST}:${NODEMANAGER_PORT}',
+  'yarn.nodemanager.localizer.address': '${INTERNAL_LISTEN_HOST}:${NODEMANAGER_LOCALIZER_PORT}',
+  'yarn.nodemanager.webapp.address': '${INTERNAL_LISTEN_HOST}:${NODEMANAGER_WEBUI_PORT}',
+
+  # Directories
+  'yarn.nodemanager.local-dirs': '${NODE_DIR}/var/lib/hadoop-yarn/cache/${USER}/nm-local-dir',
+  'yarn.nodemanager.log-dirs': '${NODE_DIR}/var/log/hadoop-yarn/containers',
+
+  # Enable the MR shuffle service, which is also used by Tez.
+  'yarn.nodemanager.aux-services': 'mapreduce_shuffle',
+  'yarn.nodemanager.aux-services.mapreduce_shuffle.class': 'org.apache.hadoop.mapred.ShuffleHandler',
+  # Disable vmem checking, since vmem is essentially free, and tasks
+  # fail with vmem limit errors otherwise.
+  'yarn.nodemanager.vmem-check-enabled': 'false',
+
+  # Limit memory used by the NM to 8GB.
+  # TODO(todd): auto-configure this based on the memory available on the machine
+  # to speed up data-loading.
+  'yarn.nodemanager.resource.memory-mb': _get_yarn_nm_ram_mb()
+}
+
+app_classpath = [
+  # Default classpath as provided by Hadoop: these environment variables are not
+  # expanded by our config templating, but rather evaluated and expanded by
+  # YARN itself, in a context where the various _HOMEs have been defined.
+  '$HADOOP_CONF_DIR',
+  '$HADOOP_COMMON_HOME/share/hadoop/common/*',
+  '$HADOOP_COMMON_HOME/share/hadoop/common/lib/*',
+  '$HADOOP_HDFS_HOME/share/hadoop/hdfs/*',
+  '$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*',
+  '$HADOOP_YARN_HOME/share/hadoop/yarn/*',
+  '$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*',
+  # Append the LZO jar for LZO-compressed file support.
+  '${LZO_JAR_PATH}']
+
+# Hive 3 needs Tez on the classpath.
+if hive_major_version == 3:
+  app_classpath += [
+      '${TEZ_HOME}/*',
+      '${TEZ_HOME}/lib/*']
+
+CONFIG['yarn.application.classpath'] = ",".join(app_classpath)
+
+if kerberize:
+  CONFIG.update({
+    'yarn.resourcemanager.keytab': '${KRB5_KTNAME}',
+    'yarn.resourcemanager.principal': '${MINIKDC_PRINC_USER}',
+    'yarn.nodemanager.keytab': '${KRB5_KTNAME}',
+    'yarn.nodemanager.principal': '${MINIKDC_PRINC_USER}',
+  })
diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.tmpl b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.tmpl
deleted file mode 100644
index 036a21c..0000000
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.tmpl
+++ /dev/null
@@ -1,154 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-
-<!-- TODO: Remove any Llama-related configuration. Can this file be removed entirely? -->
-<configuration>
-  <property>
-    <name>yarn.resourcemanager.webapp.address</name>
-    <value>${EXTERNAL_LISTEN_HOST}:${YARN_WEBUI_PORT}</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.address</name>
-    <value>${INTERNAL_LISTEN_HOST}:${NODEMANAGER_PORT}</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.delete.debug-delay-sec</name>
-    <value>600</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.resource.memory-mb</name>
-    <value>16384</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.resource.cpu-vcores</name>
-    <value>16</value>
-  </property>
-
-  <property>
-    <name>yarn.resourcemanager.nodemanagers.heartbeat-interval-ms</name>
-    <value>100</value>
-  </property>
-
-  <property>
-    <name>yarn.scheduler.fair.continuous-scheduling-enabled</name>
-    <value>true</value>
-  </property>
-
-  <property>
-    <name>yarn.scheduler.fair.assignmultiple</name>
-    <value>true</value>
-  </property>
-
-  <property>
-    <name>yarn.resourcemanager.scheduler.class</name>
-    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.localizer.address</name>
-    <value>${INTERNAL_LISTEN_HOST}:${NODEMANAGER_LOCALIZER_PORT}</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.webapp.address</name>
-    <value>${INTERNAL_LISTEN_HOST}:${NODEMANAGER_WEBUI_PORT}</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.local-dirs</name>
-    <value>${NODE_DIR}/data/yarn/local</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.log-dirs</name>
-    <value>${NODE_DIR}/data/yarn/logs</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.aux-services</name>
-    <value>mapreduce_shuffle</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
-    <value>org.apache.hadoop.mapred.ShuffleHandler</value>
-  </property>
-
-  <property>
-    <name>yarn.log-aggregation-enable</name>
-    <value>true</value>
-  </property>
-
-  <property>
-    <description>List of directories to store localized files in.</description>
-    <name>yarn.nodemanager.local-dirs</name>
-    <value>${NODE_DIR}/var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
-  </property>
-
-  <property>
-    <description>Where to store container logs.</description>
-    <name>yarn.nodemanager.log-dirs</name>
-    <value>${NODE_DIR}/var/log/hadoop-yarn/containers</value>
-  </property>
-
-  <property>
-    <description>Where to aggregate logs to.</description>
-    <name>yarn.nodemanager.remote-app-log-dir</name>
-    <value>${NODE_DIR}/var/log/hadoop-yarn/apps</value>
-  </property>
-
-  <property>
-    <description>Classpath for typical applications.</description>
-     <name>yarn.application.classpath</name>
-     <value>
-        ${HADOOP_CONF_DIR},
-        ${HADOOP_HOME}/share/hadoop/tools/lib/*,
-        ${HADOOP_HOME}/share/hadoop/common/*,
-        ${HADOOP_HOME}/share/hadoop/common/lib/*,
-        ${HADOOP_HOME}/share/hadoop/hdfs/*,
-        ${HADOOP_HOME}/share/hdfs/common/lib/*,
-        ${HADOOP_HOME}/share/hadoop/mapreduce/*,
-        ${HADOOP_HOME}/share/hadoop/mapreduce/lib/*,
-        ${HADOOP_HOME}/share/hadoop/yarn/*,
-        ${HADOOP_HOME}/share/hadoop/yarn/lib/*,
-        ${LZO_JAR_PATH}
-     </value>
-  </property>
-
-  <!-- BEGIN Kerberos settings -->
-
-  <!-- KERBEROS TODO: Add these to yarn.application.classpath.
-       ${IMPALA_FE_DIR}/target/*,${HADOOP_LZO}/build/*,
-       ${IMPALA_FE_DIR}/target/dependency/* -->
-
-  <!-- ResourceManager security configs -->
-  <property>
-    <name>yarn.resourcemanager.keytab</name>
-    <value>${KRB5_KTNAME}</value>
-  </property>
-
-  <property>
-    <name>yarn.resourcemanager.principal</name>
-    <value>${MINIKDC_PRINC_USER}</value>
-    <!-- Sort of horrible: instead of the yarn principle, we'll use ${USER}
-         so that we don't have a problem with file system permissions. -->
-  </property>
-
-  <!-- NodeManager security configs -->
-  <property>
-    <name>yarn.nodemanager.keytab</name>
-    <value>${KRB5_KTNAME}</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.principal</name>
-    <value>${MINIKDC_PRINC_USER}</value>
-    <!-- Also sort of horrible as per above -->
-  </property>
-  <!-- END Kerberos settings -->
-
-</configuration>