You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/05/10 15:25:00 UTC

[impala] branch master updated (d423979 -> 17daa6e)

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from d423979  IMPALA-5843: Use page index in Parquet files to skip pages
     new a2c5d95  IMPALA-8121: part 2: use local catalog in containers
     new 327b938  IMPALA-8516. Update maven for Jenkins builds
     new 17daa6e  IMPALA-8369 (part 2): Hive 3: switch to Tez-on-YARN execution

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 bin/bootstrap_build.sh                             |  11 +-
 bin/bootstrap_system.sh                            |  18 +-
 bin/bootstrap_toolchain.py                         |   7 +-
 bin/create-test-configuration.sh                   |  11 +-
 bin/generate_xml_config.py                         |   8 +-
 bin/impala-config.sh                               |  10 +-
 bin/jenkins/critique-gerrit-review.py              |   2 +-
 docker/catalogd/Dockerfile                         |   3 +-
 docker/coord_exec/Dockerfile                       |   3 +-
 docker/coordinator/Dockerfile                      |   2 +-
 fe/pom.xml                                         |  26 ++-
 .../apache/impala/analysis/CopyTestCaseStmt.java   |   2 +-
 .../java/org/apache/impala/service/JdbcTest.java   |  12 +-
 fe/src/test/resources/hive-site.xml.py             |   5 +-
 shaded-deps/pom.xml                                |   1 -
 testdata/cluster/admin                             |  16 ++
 .../common/etc/hadoop/conf/capacity-scheduler.xml  | 223 +++++++++++++++++++++
 .../common/etc/hadoop/conf/yarn-site.xml.py        |  97 +++++++++
 .../common/etc/hadoop/conf/yarn-site.xml.tmpl      | 154 --------------
 .../partition-ddl-predicates-hdfs-only.test        |  12 +-
 tests/common/impala_connection.py                  |   1 -
 tests/common/impala_test_suite.py                  |  20 +-
 tests/common/skip.py                               |  50 +++++
 tests/hs2/test_hs2.py                              |  57 ++++++
 tests/metadata/test_ddl.py                         |  18 +-
 tests/metadata/test_ddl_base.py                    |  14 +-
 tests/metadata/test_hdfs_permissions.py            |   4 +-
 tests/metadata/test_hms_integration.py             |  10 +-
 tests/metadata/test_metadata_query_statements.py   |   4 +-
 tests/metadata/test_recover_partitions.py          |   3 +-
 tests/query_test/test_hdfs_caching.py              |   3 +-
 tests/query_test/test_insert_behaviour.py          |   8 +-
 tests/query_test/test_kudu.py                      |  36 +++-
 tests/query_test/test_queries.py                   |   3 +-
 tests/query_test/test_udfs.py                      |   6 +-
 35 files changed, 633 insertions(+), 227 deletions(-)
 create mode 100644 testdata/cluster/node_templates/common/etc/hadoop/conf/capacity-scheduler.xml
 create mode 100644 testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
 delete mode 100644 testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.tmpl


[impala] 02/03: IMPALA-8516. Update maven for Jenkins builds

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 327b93821453f6abda2cb2d4437bba10946acc00
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Tue May 7 11:09:17 2019 -0700

    IMPALA-8516. Update maven for Jenkins builds
    
    This changes Maven to download and install on both Ubuntu and Redhat for
    the Jenkins builds (previously it was only installed on Redhat).
    
    The version number is kept at 3.5.4 even though a newer release is
    available upstream. The new release fails to build Impala due to an
    XML-parsing bug causing it to fail to resolve the parquet pom [1]
    
    This should hopefully address some of the hang issues we've seen
    previously with the older version of Maven that shipped with the version
    of Ubuntu we have on Ubuntu 16.04.
    
    [1] https://github.com/codehaus-plexus/plexus-utils/issues/65
    
    Change-Id: I793409eb4e9f4533b75bfe089a497c0ea62ad1ff
    Reviewed-on: http://gerrit.cloudera.org:8080/13268
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Todd Lipcon <to...@apache.org>
---
 bin/bootstrap_build.sh  | 11 ++++++++++-
 bin/bootstrap_system.sh | 18 ++++++++++++------
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/bin/bootstrap_build.sh b/bin/bootstrap_build.sh
index 82f1f37..b94a026 100755
--- a/bin/bootstrap_build.sh
+++ b/bin/bootstrap_build.sh
@@ -31,7 +31,7 @@ set -euxo pipefail
 
 # Install non-java dependencies:
 sudo apt-get update
-sudo apt-get --yes install g++ gcc git libsasl2-dev libssl-dev make maven \
+sudo apt-get --yes install g++ gcc git libsasl2-dev libssl-dev make \
     python-dev python-setuptools libffi-dev libkrb5-dev
 
 
@@ -45,4 +45,13 @@ fi
 sudo apt-get --yes install openjdk-${JDK_VERSION}-jdk openjdk-${JDK_VERSION}-source
 export JAVA_HOME=/usr/lib/jvm/java-${JDK_VERSION}-openjdk-amd64
 
+# Download Maven since the packaged version is pretty old.
+if [ ! -d /usr/local/apache-maven-3.5.4 ]; then
+  sudo wget -nv \
+    https://www-us.apache.org/dist/maven/maven-3/3.5.4/binaries/apache-maven-3.5.4-bin.tar.gz
+  sha512sum -c - <<< '2a803f578f341e164f6753e410413d16ab60fabe31dc491d1fe35c984a5cce696bc71f57757d4538fe7738be04065a216f3ebad4ef7e0ce1bb4c51bc36d6be86 apache-maven-3.5.4-bin.tar.gz'
+  sudo tar -C /usr/local -xzf apache-maven-3.5.4-bin.tar.gz
+  sudo ln -s /usr/local/apache-maven-3.5.4/bin/mvn /usr/local/bin
+fi
+
 ./buildall.sh -notests -so
diff --git a/bin/bootstrap_system.sh b/bin/bootstrap_system.sh
index af43a26..d24aed4 100755
--- a/bin/bootstrap_system.sh
+++ b/bin/bootstrap_system.sh
@@ -189,7 +189,7 @@ echo ">>> Installing build tools"
 ubuntu apt-get update
 ubuntu apt-get --yes install ccache g++ gcc libffi-dev liblzo2-dev libkrb5-dev \
         krb5-admin-server krb5-kdc krb5-user libsasl2-dev libsasl2-modules \
-        libsasl2-modules-gssapi-mit libssl-dev make maven ninja-build ntp \
+        libsasl2-modules-gssapi-mit libssl-dev make ninja-build ntp \
         ntpdate python-dev python-setuptools postgresql ssh wget vim-common psmisc \
         lsof openjdk-8-jdk openjdk-8-source openjdk-8-dbg apt-utils git ant
 
@@ -235,17 +235,23 @@ redhat sudo yum install -y ccache
 # Clean up yum caches
 redhat sudo yum clean all
 
-# Download ant and mvn for centos
+# Download ant for centos
 redhat sudo wget -nv \
-  https://www-us.apache.org/dist/maven/maven-3/3.5.4/binaries/apache-maven-3.5.4-bin.tar.gz \
   https://www-us.apache.org/dist/ant/binaries/apache-ant-1.9.13-bin.tar.gz
-redhat sha512sum -c - <<< '2a803f578f341e164f6753e410413d16ab60fabe31dc491d1fe35c984a5cce696bc71f57757d4538fe7738be04065a216f3ebad4ef7e0ce1bb4c51bc36d6be86  apache-maven-3.5.4-bin.tar.gz'
 redhat sha512sum -c - <<< 'c8321aa223f70d7e64d3d0274263000cfffb46fbea61488534e26f9f0245d99e9872d0888e35cd3274416392a13f80c748c07750caaeffa5f9cae1220020715f  apache-ant-1.9.13-bin.tar.gz'
-redhat sudo tar -C /usr/local -xzf apache-maven-3.5.4-bin.tar.gz
 redhat sudo tar -C /usr/local -xzf apache-ant-1.9.13-bin.tar.gz
-redhat sudo ln -s /usr/local/apache-maven-3.5.4/bin/mvn /usr/local/bin
 redhat sudo ln -s /usr/local/apache-ant-1.9.13/bin/ant /usr/local/bin
 
+# Download maven for all OSes, since the OS-packaged version can be
+# pretty old.
+if [ ! -d /usr/local/apache-maven-3.5.4 ]; then
+  sudo wget -nv \
+    https://www-us.apache.org/dist/maven/maven-3/3.5.4/binaries/apache-maven-3.5.4-bin.tar.gz
+  sha512sum -c - <<< '2a803f578f341e164f6753e410413d16ab60fabe31dc491d1fe35c984a5cce696bc71f57757d4538fe7738be04065a216f3ebad4ef7e0ce1bb4c51bc36d6be86 apache-maven-3.5.4-bin.tar.gz'
+  sudo tar -C /usr/local -xzf apache-maven-3.5.4-bin.tar.gz
+  sudo ln -s /usr/local/apache-maven-3.5.4/bin/mvn /usr/local/bin
+fi
+
 if ! { service --status-all | grep -E '^ \[ \+ \]  ssh$'; }
 then
   ubuntu sudo service ssh start


[impala] 03/03: IMPALA-8369 (part 2): Hive 3: switch to Tez-on-YARN execution

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 17daa6efb9c3c5c6fbd0908f2176b99d8498a250
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Thu May 2 14:25:57 2019 -0700

    IMPALA-8369 (part 2): Hive 3: switch to Tez-on-YARN execution
    
    This switches away from Tez local mode to tez-on-YARN. After spending a
    couple of days trying to debug issues with Tez local mode, it seemed
    like it was just going to be too much of a lift.
    
    This patch switches on the starting of a Yarn RM and NM when
    USE_CDP_HIVE is enabled. It also switches to a new yarn-site.xml with a
    minimized set of configurations, generated by the new python templating.
    
    In order for everything to work properly I also had to update the Hadoop
    dependency to come from CDP instead of CDH when using CDP Hive.
    Otherwise, the classpath of the launched Tez containers had conflicting
    versions of various Hadoop classes which caused tasks to fail.
    
    I verified that this fixes concurrent query execution by running queries
    in parallel in two beeline sessions. With local mode, these queries
    would periodically fail due to various races (HIVE-21682). I'm also able
    to get farther along in data loading.
    
    Change-Id: If96064f271582b2790a3cfb3d135f3834d46c41d
    Reviewed-on: http://gerrit.cloudera.org:8080/13224
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Todd Lipcon <to...@apache.org>
---
 bin/bootstrap_toolchain.py                         |   7 +-
 bin/create-test-configuration.sh                   |  11 +-
 bin/generate_xml_config.py                         |   8 +-
 bin/impala-config.sh                               |  10 +-
 bin/jenkins/critique-gerrit-review.py              |   2 +-
 fe/pom.xml                                         |  26 ++-
 .../apache/impala/analysis/CopyTestCaseStmt.java   |   2 +-
 fe/src/test/resources/hive-site.xml.py             |   5 +-
 shaded-deps/pom.xml                                |   1 -
 testdata/cluster/admin                             |  16 ++
 .../common/etc/hadoop/conf/capacity-scheduler.xml  | 223 +++++++++++++++++++++
 .../common/etc/hadoop/conf/yarn-site.xml.py        |  97 +++++++++
 .../common/etc/hadoop/conf/yarn-site.xml.tmpl      | 154 --------------
 13 files changed, 389 insertions(+), 173 deletions(-)

diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index 07a646d..34547fe 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -553,10 +553,10 @@ if __name__ == "__main__":
   toolchain_host = os.environ["IMPALA_TOOLCHAIN_HOST"]
   cdh_build_number = os.environ["CDH_BUILD_NUMBER"]
 
-  cdh_components = map(Package, ["hadoop", "hbase", "sentry"])
+  cdh_components = map(Package, ["hbase", "sentry"])
   use_cdp_hive = os.getenv("USE_CDP_HIVE") == "true"
   if not use_cdp_hive:
-    cdh_components += [Package("hive")]
+    cdh_components += [Package("hive"), Package("hadoop")]
 
   if use_cdh_kudu:
     if not try_get_platform_release_label() or not try_get_platform_release_label().cdh:
@@ -580,12 +580,13 @@ if __name__ == "__main__":
   cdp_components = [
     CdpComponent("ranger-{0}-admin".format(os.environ.get("IMPALA_RANGER_VERSION"))),
   ]
-  use_cdp_hive = os.getenv("USE_CDP_HIVE") == "true"
   if use_cdp_hive:
     hive_version = os.environ.get("IMPALA_HIVE_VERSION")
     cdp_components.append(CdpComponent("hive-{0}-source".format(hive_version),
                           pkg_directory="hive-{0}".format(hive_version))),
     cdp_components.append(CdpComponent("apache-hive-{0}-bin".format(hive_version))),
+    cdp_components.append(CdpComponent("hadoop-{0}"
+                          .format(os.environ.get("IMPALA_HADOOP_VERSION")))),
     cdp_components.append(CdpComponent(
         "tez-{0}-minimal".format(os.environ.get("IMPALA_TEZ_VERSION")),
         makedir=True))
diff --git a/bin/create-test-configuration.sh b/bin/create-test-configuration.sh
index 208d4f8..8d08562 100755
--- a/bin/create-test-configuration.sh
+++ b/bin/create-test-configuration.sh
@@ -174,12 +174,12 @@ if [ $CREATE_RANGER_POLICY_DB -eq 1 ]; then
   popd
 fi
 
-echo "Linking core-site.xml from local cluster"
+echo "Linking common conf files from local cluster:"
 CLUSTER_HADOOP_CONF_DIR=$(${CLUSTER_DIR}/admin get_hadoop_client_conf_dir)
-ln -s ${CLUSTER_HADOOP_CONF_DIR}/core-site.xml
-
-echo "Linking hdfs-site.xml from local cluster"
-ln -s ${CLUSTER_HADOOP_CONF_DIR}/hdfs-site.xml
+for file in core-site.xml hdfs-site.xml yarn-site.xml ; do
+  echo ... $file
+  ln -s ${CLUSTER_HADOOP_CONF_DIR}/$file
+done
 
 if ${CLUSTER_DIR}/admin is_kerberized; then
   # KERBEROS TODO: Without this, the yarn daemons can see these
@@ -190,7 +190,6 @@ if ${CLUSTER_DIR}/admin is_kerberized; then
   # kerberos principals. Obviously this has to be sorted out before
   # a kerberized cluster can load data.
   echo "Linking yarn and mapred from local cluster"
-  ln -s ${CLUSTER_HADOOP_CONF_DIR}/yarn-site.xml
   ln -s ${CLUSTER_HADOOP_CONF_DIR}/mapred-site.xml
 fi
 
diff --git a/bin/generate_xml_config.py b/bin/generate_xml_config.py
index a06da7e..18e3615 100755
--- a/bin/generate_xml_config.py
+++ b/bin/generate_xml_config.py
@@ -80,6 +80,8 @@ def dump_config(d, source_path, out):
   print >>out, dedent(header)
   for k, v in sorted(d.iteritems()):
     try:
+      if isinstance(v, int):
+        v = str(v)
       v = _substitute_env_vars(v)
     except KeyError, e:
       raise Exception("failed environment variable substitution for value {k}: {e}"
@@ -98,7 +100,11 @@ def main():
     sys.exit(1)
 
   _, in_path, out_path = sys.argv
-  mod = imp.load_source('template', in_path)
+  try:
+    mod = imp.load_source('template', in_path)
+  except:  # noqa
+    print >>sys.stderr, "Unable to load template: %s" % in_path
+    raise
   conf = mod.__dict__.get('CONFIG')
   if not isinstance(conf, dict):
     raise Exception("module in '{path}' should define a dict named CONFIG"
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index cc8cfef..68de5e4 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -162,7 +162,8 @@ export IMPALA_TOOLCHAIN_HOST
 export CDH_MAJOR_VERSION=6
 export CDH_BUILD_NUMBER=1055188
 export CDP_BUILD_NUMBER=1056671
-export IMPALA_HADOOP_VERSION=3.0.0-cdh6.x-SNAPSHOT
+export CDH_HADOOP_VERSION=3.0.0-cdh6.x-SNAPSHOT
+export CDP_HADOOP_VERSION=3.1.1.6.0.99.0-147
 export IMPALA_HBASE_VERSION=2.1.0-cdh6.x-SNAPSHOT
 export IMPALA_SENTRY_VERSION=2.1.0-cdh6.x-SNAPSHOT
 export IMPALA_RANGER_VERSION=1.2.0.6.0.99.0-147
@@ -200,10 +201,14 @@ if $USE_CDP_HIVE; then
   # the minicluster
   export IMPALA_HIVE_VERSION=${CDP_HIVE_VERSION}
   export IMPALA_TEZ_VERSION=0.9.1.6.0.99.0-147
+  export IMPALA_HADOOP_VERSION=${CDP_HADOOP_VERSION}
+  export HADOOP_HOME="$CDP_COMPONENTS_HOME/hadoop-${CDP_HADOOP_VERSION}/"
 else
   # CDH hive version is used to build and deploy in minicluster when USE_CDP_HIVE is
   # false
   export IMPALA_HIVE_VERSION=${CDH_HIVE_VERSION}
+  export IMPALA_HADOOP_VERSION=${CDH_HADOOP_VERSION}
+  export HADOOP_HOME="$CDH_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}/"
 fi
 # Extract the first component of the hive version.
 # Allow overriding of Hive source location in case we want to build Impala without
@@ -510,9 +515,6 @@ export IMPALA_COMMON_DIR="$IMPALA_HOME/common"
 export PATH="$IMPALA_TOOLCHAIN/gdb-$IMPALA_GDB_VERSION/bin:$PATH"
 export PATH="$IMPALA_HOME/bin:$IMPALA_TOOLCHAIN/cmake-$IMPALA_CMAKE_VERSION/bin/:$PATH"
 
-# Typically we build against a snapshot build of Hadoop that includes everything we need
-# for building Impala and running a minicluster.
-export HADOOP_HOME="$CDH_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}/"
 export HADOOP_CONF_DIR="$IMPALA_FE_DIR/src/test/resources"
 # The include and lib paths are needed to pick up hdfs.h and libhdfs.*
 # Allow overriding in case we want to point to a package/install with a different layout.
diff --git a/bin/jenkins/critique-gerrit-review.py b/bin/jenkins/critique-gerrit-review.py
index 5048a1d..c2bfdb7 100755
--- a/bin/jenkins/critique-gerrit-review.py
+++ b/bin/jenkins/critique-gerrit-review.py
@@ -69,7 +69,7 @@ EXCLUDE_FILE_PATTERNS = [
     re.compile(r".*/catalog/BuiltinsDb.java"),  # Many long strings.
     re.compile(r".*/codegen/gen_ir_descriptions.py"),  # Many long strings.
     re.compile(r".*shell/ext-py/.*"),  # Third-party code.
-    re.compile(r".*/fe/src/test/resources/.*.py") # Long lines in config files.
+    re.compile(r".*/.*\.xml\.py")  # Long lines in config template files.
 ]
 
 
diff --git a/fe/pom.xml b/fe/pom.xml
index 43701b4..ecd79d3 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -53,7 +53,11 @@ under the License.
       <artifactId>hadoop-hdfs</artifactId>
       <version>${hadoop.version}</version>
     </dependency>
-
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs-client</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>
@@ -178,6 +182,10 @@ under the License.
           <groupId>org.apache.hive</groupId>
           <artifactId>*</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
 
@@ -993,6 +1001,22 @@ under the License.
               <groupId>org.apache.hive</groupId>
               <artifactId>hive-shims</artifactId>
             </exclusion>
+            <exclusion>
+              <groupId>org.apache.hadoop</groupId>
+              <artifactId>*</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+        <!-- needed for JobConf, which HiveConf inherits from -->
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-mapreduce-client-core</artifactId>
+          <version>${hadoop.version}</version>
+          <exclusions>
+            <exclusion>
+              <groupId>*</groupId>
+              <artifactId>*</artifactId>
+            </exclusion>
           </exclusions>
         </dependency>
       </dependencies>
diff --git a/fe/src/main/java/org/apache/impala/analysis/CopyTestCaseStmt.java b/fe/src/main/java/org/apache/impala/analysis/CopyTestCaseStmt.java
index 5023963..d961f95 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CopyTestCaseStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CopyTestCaseStmt.java
@@ -17,7 +17,7 @@
 
 package org.apache.impala.analysis;
 
-import avro.shaded.com.google.common.collect.Sets;
+import com.google.common.collect.Sets;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import org.apache.hadoop.fs.FSDataOutputStream;
diff --git a/fe/src/test/resources/hive-site.xml.py b/fe/src/test/resources/hive-site.xml.py
index 18e0011..66e62a7 100644
--- a/fe/src/test/resources/hive-site.xml.py
+++ b/fe/src/test/resources/hive-site.xml.py
@@ -81,7 +81,10 @@ if hive_major_version >= 3:
   CONFIG.update({
    'hive.tez.container.size': '512',
    'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager',
-   'tez.local.mode': 'true'})
+   # We run YARN with Tez on the classpath directly
+   'tez.ignore.lib.uris': 'true',
+   'tez.use.cluster.hadoop-libs': 'true',
+  })
 else:
   CONFIG.update({
    # TODO(vihang) Disabled for HMS3.
diff --git a/shaded-deps/pom.xml b/shaded-deps/pom.xml
index 6aad3c5..579758e 100644
--- a/shaded-deps/pom.xml
+++ b/shaded-deps/pom.xml
@@ -51,7 +51,6 @@ the same dependencies
           <artifactSet>
             <includes>
               <include>org.apache.hive:hive-exec</include>
-              <include>org.apache.hadoop:hadoop-mapreduce-client</include>
             </includes>
           </artifactSet>
           <relocations>
diff --git a/testdata/cluster/admin b/testdata/cluster/admin
index acc44a5..9eafd8c 100755
--- a/testdata/cluster/admin
+++ b/testdata/cluster/admin
@@ -34,6 +34,11 @@ setup_report_build_error
 : ${IMPALA_KERBERIZE=}
 : ${INCLUDE_YARN=}
 
+# For Hive 3, we require Yarn for Tez support.
+if [[ $USE_CDP_HIVE ]]; then
+  INCLUDE_YARN=1
+fi
+
 while getopts vky OPT; do
   case $OPT in
     v) set -x;;
@@ -54,6 +59,7 @@ NODE_PREFIX=node-
 COMMON_NODE_TEMPLATE="$DIR/node_templates/common"
 NODE_TEMPLATE="$DIR/node_templates/cdh$CDH_MAJOR_VERSION"
 TEMPLATE_SUFFIX=".tmpl"
+PY_TEMPLATE_SUFFIX=".xml.py"
 
 # Each process should be marked with this so a "pkill -f" can be done to nuke everything.
 export KILL_CLUSTER_MARKER=IBelongToTheMiniCluster
@@ -237,6 +243,9 @@ function create_cluster {
       # Remove master role scripts from slave nodes
       rm -f "$NODE_DIR/etc/init.d/"{hdfs-namenode,yarn-resourcemanager} \
             "$NODE_DIR/etc/init.d/"{kms,kudu-master}
+      # Only run one YARN nodemanager (more memory-efficient to scale up a
+      # single NM than run several)
+      rm -f "$NODE_DIR/etc/init.d/yarn-nodemanager"
     fi
     for EMPTY_NODE_DIR in $EMPTY_NODE_DIRS; do
       mkdir -p "$NODE_DIR/$EMPTY_NODE_DIR"
@@ -302,6 +311,13 @@ function create_cluster {
       fi
       rm "$TEMPLATE_PATH" "$ACTUAL_PATH.1"
     done
+    # Substitute python-templated XML files.
+    # TODO(todd): move over all the XML templates to be Python-based.
+    for TEMPLATE_PATH in $(find "$NODE_DIR" -name "*$PY_TEMPLATE_SUFFIX"); do
+      ACTUAL_PATH="${TEMPLATE_PATH%$PY_TEMPLATE_SUFFIX}".xml
+      $IMPALA_HOME/bin/generate_xml_config.py $TEMPLATE_PATH $ACTUAL_PATH
+      rm $TEMPLATE_PATH
+    done
   done
 }
 
diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/capacity-scheduler.xml b/testdata/cluster/node_templates/common/etc/hadoop/conf/capacity-scheduler.xml
new file mode 100644
index 0000000..80d4ed1
--- /dev/null
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/capacity-scheduler.xml
@@ -0,0 +1,223 @@
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+
+  NOTE: this is the default capacity-scheduler.xml that ships with
+  YARN. No Impala-specific modifications have been made.
+-->
+<configuration>
+
+  <property>
+    <name>yarn.scheduler.capacity.maximum-applications</name>
+    <value>10000</value>
+    <description>
+      Maximum number of applications that can be pending and running.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
+    <value>0.1</value>
+    <description>
+      Maximum percent of resources in the cluster which can be used to run 
+      application masters i.e. controls number of concurrent running
+      applications.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.resource-calculator</name>
+    <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
+    <description>
+      The ResourceCalculator implementation to be used to compare 
+      Resources in the scheduler.
+      The default i.e. DefaultResourceCalculator only uses Memory while
+      DominantResourceCalculator uses dominant-resource to compare 
+      multi-dimensional resources such as Memory, CPU etc.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.queues</name>
+    <value>default</value>
+    <description>
+      The queues at the this level (root is the root queue).
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.capacity</name>
+    <value>100</value>
+    <description>Default queue target capacity.</description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
+    <value>1</value>
+    <description>
+      Default queue user limit a percentage from 0.0 to 1.0.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
+    <value>100</value>
+    <description>
+      The maximum capacity of the default queue. 
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.state</name>
+    <value>RUNNING</value>
+    <description>
+      The state of the default queue. State can be one of RUNNING or STOPPED.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
+    <value>*</value>
+    <description>
+      The ACL of who can submit jobs to the default queue.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
+    <value>*</value>
+    <description>
+      The ACL of who can administer jobs on the default queue.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.acl_application_max_priority</name>
+    <value>*</value>
+    <description>
+      The ACL of who can submit applications with configured priority.
+      For e.g, [user={name} group={name} max_priority={priority} default_priority={priority}]
+    </description>
+  </property>
+
+   <property>
+     <name>yarn.scheduler.capacity.root.default.maximum-application-lifetime
+     </name>
+     <value>-1</value>
+     <description>
+        Maximum lifetime of an application which is submitted to a queue
+        in seconds. Any value less than or equal to zero will be considered as
+        disabled.
+        This will be a hard time limit for all applications in this
+        queue. If positive value is configured then any application submitted
+        to this queue will be killed after exceeds the configured lifetime.
+        User can also specify lifetime per application basis in
+        application submission context. But user lifetime will be
+        overridden if it exceeds queue maximum lifetime. It is point-in-time
+        configuration.
+        Note : Configuring too low value will result in killing application
+        sooner. This feature is applicable only for leaf queue.
+     </description>
+   </property>
+
+   <property>
+     <name>yarn.scheduler.capacity.root.default.default-application-lifetime
+     </name>
+     <value>-1</value>
+     <description>
+        Default lifetime of an application which is submitted to a queue
+        in seconds. Any value less than or equal to zero will be considered as
+        disabled.
+        If the user has not submitted application with lifetime value then this
+        value will be taken. It is point-in-time configuration.
+        Note : Default lifetime can't exceed maximum lifetime. This feature is
+        applicable only for leaf queue.
+     </description>
+   </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.node-locality-delay</name>
+    <value>40</value>
+    <description>
+      Number of missed scheduling opportunities after which the CapacityScheduler 
+      attempts to schedule rack-local containers.
+      When setting this parameter, the size of the cluster should be taken into account.
+      We use 40 as the default value, which is approximately the number of nodes in one rack.
+      Note, if this value is -1, the locality constraint in the container request
+      will be ignored, which disables the delay scheduling.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.rack-locality-additional-delay</name>
+    <value>-1</value>
+    <description>
+      Number of additional missed scheduling opportunities over the node-locality-delay
+      ones, after which the CapacityScheduler attempts to schedule off-switch containers,
+      instead of rack-local ones.
+      Example: with node-locality-delay=40 and rack-locality-delay=20, the scheduler will
+      attempt rack-local assignments after 40 missed opportunities, and off-switch assignments
+      after 40+20=60 missed opportunities.
+      When setting this parameter, the size of the cluster should be taken into account.
+      We use -1 as the default value, which disables this feature. In this case, the number
+      of missed opportunities for assigning off-switch containers is calculated based on
+      the number of containers and unique locations specified in the resource request,
+      as well as the size of the cluster.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.queue-mappings</name>
+    <value></value>
+    <description>
+      A list of mappings that will be used to assign jobs to queues
+      The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
+      Typically this list will be used to map users to queues,
+      for example, u:%user:%user maps all users to queues with the same name
+      as the user.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
+    <value>false</value>
+    <description>
+      If a queue mapping is present, will it override the value specified
+      by the user? This can be used by administrators to place jobs in queues
+      that are different than the one specified by the user.
+      The default is false.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.per-node-heartbeat.maximum-offswitch-assignments</name>
+    <value>1</value>
+    <description>
+      Controls the number of OFF_SWITCH assignments allowed
+      during a node's heartbeat. Increasing this value can improve
+      scheduling rate for OFF_SWITCH containers. Lower values reduce
+      "clumping" of applications on particular nodes. The default is 1.
+      Legal values are 1-MAX_INT. This config is refreshable.
+    </description>
+  </property>
+
+
+  <property>
+    <name>yarn.scheduler.capacity.application.fail-fast</name>
+    <value>false</value>
+    <description>
+      Whether RM should fail during recovery if previous applications'
+      queue is no longer valid.
+    </description>
+  </property>
+
+</configuration>
diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
new file mode 100644
index 0000000..305feb3
--- /dev/null
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import sys
+
+kerberize = os.environ.get('IMPALA_KERBERIZE') == '1'
+hive_major_version = int(os.environ['IMPALA_HIVE_VERSION'][0])
+
+
+def _get_system_ram_mb():
+  lines = file("/proc/meminfo").readlines()
+  memtotal_line = [l for l in lines if l.startswith('MemTotal')][0]
+  mem_kb = int(memtotal_line.split()[1])
+  return mem_kb / 1024
+
+
+def _get_yarn_nm_ram_mb():
+  sys_ram = _get_system_ram_mb()
+  # Fit into the following envelope:
+  # - need 4GB at a bare minimum
+  # - leave at least 24G for other services
+  # - don't need more than 48G
+  ret = min(max(sys_ram - 24 * 1024, 4096), 48 * 1024)
+  print >>sys.stderr, "Configuring Yarn NM to use {0}MB RAM".format(ret)
+  return ret
+
+
+CONFIG = {
+  # Host/port configs
+  'yarn.resourcemanager.webapp.address': '${EXTERNAL_LISTEN_HOST}:${YARN_WEBUI_PORT}',
+  'yarn.nodemanager.address': '${INTERNAL_LISTEN_HOST}:${NODEMANAGER_PORT}',
+  'yarn.nodemanager.localizer.address': '${INTERNAL_LISTEN_HOST}:${NODEMANAGER_LOCALIZER_PORT}',
+  'yarn.nodemanager.webapp.address': '${INTERNAL_LISTEN_HOST}:${NODEMANAGER_WEBUI_PORT}',
+
+  # Directories
+  'yarn.nodemanager.local-dirs': '${NODE_DIR}/var/lib/hadoop-yarn/cache/${USER}/nm-local-dir',
+  'yarn.nodemanager.log-dirs': '${NODE_DIR}/var/log/hadoop-yarn/containers',
+
+  # Enable the MR shuffle service, which is also used by Tez.
+  'yarn.nodemanager.aux-services': 'mapreduce_shuffle',
+  'yarn.nodemanager.aux-services.mapreduce_shuffle.class': 'org.apache.hadoop.mapred.ShuffleHandler',
+  # Disable vmem checking, since vmem is essentially free, and tasks
+  # fail with vmem limit errors otherwise.
+  'yarn.nodemanager.vmem-check-enabled': 'false',
+
+  # Limit memory used by the NM to 8GB.
+  # TODO(todd): auto-configure this based on the memory available on the machine
+  # to speed up data-loading.
+  'yarn.nodemanager.resource.memory-mb': _get_yarn_nm_ram_mb()
+}
+
+app_classpath = [
+  # Default classpath as provided by Hadoop: these environment variables are not
+  # expanded by our config templating, but rather evaluated and expanded by
+  # YARN itself, in a context where the various _HOMEs have been defined.
+  '$HADOOP_CONF_DIR',
+  '$HADOOP_COMMON_HOME/share/hadoop/common/*',
+  '$HADOOP_COMMON_HOME/share/hadoop/common/lib/*',
+  '$HADOOP_HDFS_HOME/share/hadoop/hdfs/*',
+  '$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*',
+  '$HADOOP_YARN_HOME/share/hadoop/yarn/*',
+  '$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*',
+  # Append the LZO jar for LZO-compressed file support.
+  '${LZO_JAR_PATH}']
+
+# Hive 3 needs Tez on the classpath.
+if hive_major_version == 3:
+  app_classpath += [
+      '${TEZ_HOME}/*',
+      '${TEZ_HOME}/lib/*']
+
+CONFIG['yarn.application.classpath'] = ",".join(app_classpath)
+
+if kerberize:
+  CONFIG.update({
+    'yarn.resourcemanager.keytab': '${KRB5_KTNAME}',
+    'yarn.resourcemanager.principal': '${MINIKDC_PRINC_USER}',
+    'yarn.nodemanager.keytab': '${KRB5_KTNAME}',
+    'yarn.nodemanager.principal': '${MINIKDC_PRINC_USER}',
+  })
diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.tmpl b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.tmpl
deleted file mode 100644
index 036a21c..0000000
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.tmpl
+++ /dev/null
@@ -1,154 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-
-<!-- TODO: Remove any Llama-related configuration. Can this file be removed entirely? -->
-<configuration>
-  <property>
-    <name>yarn.resourcemanager.webapp.address</name>
-    <value>${EXTERNAL_LISTEN_HOST}:${YARN_WEBUI_PORT}</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.address</name>
-    <value>${INTERNAL_LISTEN_HOST}:${NODEMANAGER_PORT}</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.delete.debug-delay-sec</name>
-    <value>600</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.resource.memory-mb</name>
-    <value>16384</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.resource.cpu-vcores</name>
-    <value>16</value>
-  </property>
-
-  <property>
-    <name>yarn.resourcemanager.nodemanagers.heartbeat-interval-ms</name>
-    <value>100</value>
-  </property>
-
-  <property>
-    <name>yarn.scheduler.fair.continuous-scheduling-enabled</name>
-    <value>true</value>
-  </property>
-
-  <property>
-    <name>yarn.scheduler.fair.assignmultiple</name>
-    <value>true</value>
-  </property>
-
-  <property>
-    <name>yarn.resourcemanager.scheduler.class</name>
-    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.localizer.address</name>
-    <value>${INTERNAL_LISTEN_HOST}:${NODEMANAGER_LOCALIZER_PORT}</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.webapp.address</name>
-    <value>${INTERNAL_LISTEN_HOST}:${NODEMANAGER_WEBUI_PORT}</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.local-dirs</name>
-    <value>${NODE_DIR}/data/yarn/local</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.log-dirs</name>
-    <value>${NODE_DIR}/data/yarn/logs</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.aux-services</name>
-    <value>mapreduce_shuffle</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
-    <value>org.apache.hadoop.mapred.ShuffleHandler</value>
-  </property>
-
-  <property>
-    <name>yarn.log-aggregation-enable</name>
-    <value>true</value>
-  </property>
-
-  <property>
-    <description>List of directories to store localized files in.</description>
-    <name>yarn.nodemanager.local-dirs</name>
-    <value>${NODE_DIR}/var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
-  </property>
-
-  <property>
-    <description>Where to store container logs.</description>
-    <name>yarn.nodemanager.log-dirs</name>
-    <value>${NODE_DIR}/var/log/hadoop-yarn/containers</value>
-  </property>
-
-  <property>
-    <description>Where to aggregate logs to.</description>
-    <name>yarn.nodemanager.remote-app-log-dir</name>
-    <value>${NODE_DIR}/var/log/hadoop-yarn/apps</value>
-  </property>
-
-  <property>
-    <description>Classpath for typical applications.</description>
-     <name>yarn.application.classpath</name>
-     <value>
-        ${HADOOP_CONF_DIR},
-        ${HADOOP_HOME}/share/hadoop/tools/lib/*,
-        ${HADOOP_HOME}/share/hadoop/common/*,
-        ${HADOOP_HOME}/share/hadoop/common/lib/*,
-        ${HADOOP_HOME}/share/hadoop/hdfs/*,
-        ${HADOOP_HOME}/share/hdfs/common/lib/*,
-        ${HADOOP_HOME}/share/hadoop/mapreduce/*,
-        ${HADOOP_HOME}/share/hadoop/mapreduce/lib/*,
-        ${HADOOP_HOME}/share/hadoop/yarn/*,
-        ${HADOOP_HOME}/share/hadoop/yarn/lib/*,
-        ${LZO_JAR_PATH}
-     </value>
-  </property>
-
-  <!-- BEGIN Kerberos settings -->
-
-  <!-- KERBEROS TODO: Add these to yarn.application.classpath.
-       ${IMPALA_FE_DIR}/target/*,${HADOOP_LZO}/build/*,
-       ${IMPALA_FE_DIR}/target/dependency/* -->
-
-  <!-- ResourceManager security configs -->
-  <property>
-    <name>yarn.resourcemanager.keytab</name>
-    <value>${KRB5_KTNAME}</value>
-  </property>
-
-  <property>
-    <name>yarn.resourcemanager.principal</name>
-    <value>${MINIKDC_PRINC_USER}</value>
-    <!-- Sort of horrible: instead of the yarn principle, we'll use ${USER}
-         so that we don't have a problem with file system permissions. -->
-  </property>
-
-  <!-- NodeManager security configs -->
-  <property>
-    <name>yarn.nodemanager.keytab</name>
-    <value>${KRB5_KTNAME}</value>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.principal</name>
-    <value>${MINIKDC_PRINC_USER}</value>
-    <!-- Also sort of horrible as per above -->
-  </property>
-  <!-- END Kerberos settings -->
-
-</configuration>


[impala] 01/03: IMPALA-8121: part 2: use local catalog in containers

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit a2c5d953b0fa6d69aa34eccaa13fa9aacd31ad10
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Thu Apr 25 11:13:16 2019 -0700

    IMPALA-8121: part 2: use local catalog in containers
    
    This enables "modern" catalog features including the
    local catalog and HMS notification support in the
    dockerised minicluster by default.
    
    The flags can be overridden if needed.
    
    Skip tests affected by these bugs:
    * IMPALA-8486 (LibCache invalidations)
    * IMPALA-8458 (alter column stats)
    * IMPALA-7131 (data sources not supported)
    * IMPALA-7538 (HDFS caching DDL not supported)
    * IMPALA-8489 TestRecoverPartitions.test_post_invalidate fails with
      IllegalStateException
    * IMPALA-8459 (cannot drop Kudu table)
    * IMPALA-7539 (insert permission checks)
    
    Fix handling of table properties in _get_properties()
    to avoid including properties from unrelated sections.
    This caused problems becase of additional properties
    added by metastore event processing.
    
    Rewrite test_partition_ddl_predicates() to change file formats rather
    than use HDFS caching DDL.
    
    Update the various test_kudu_col* tests to not expect staleness of
    Kudu metadata for catalog V2.
    
    Fix IMPALA-8464 so that testMetaDataGetColumnComments() allows the
    table comment to be present, which is the new behaviour. Add a
    new end-to-end test test_get_tables() that tests the precise
    behaviour for different catalog versions so as to not lose
    coverage.
    
    Change-Id: I900d4b718cca98bcf86d36a2e64c0b6a424a5b7c
    Reviewed-on: http://gerrit.cloudera.org:8080/13226
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 docker/catalogd/Dockerfile                         |  3 +-
 docker/coord_exec/Dockerfile                       |  3 +-
 docker/coordinator/Dockerfile                      |  2 +-
 .../java/org/apache/impala/service/JdbcTest.java   | 12 +++--
 .../partition-ddl-predicates-hdfs-only.test        | 12 ++---
 tests/common/impala_connection.py                  |  1 -
 tests/common/impala_test_suite.py                  | 20 +++++++-
 tests/common/skip.py                               | 50 +++++++++++++++++++
 tests/hs2/test_hs2.py                              | 57 ++++++++++++++++++++++
 tests/metadata/test_ddl.py                         | 18 ++++---
 tests/metadata/test_ddl_base.py                    | 14 ++++--
 tests/metadata/test_hdfs_permissions.py            |  4 +-
 tests/metadata/test_hms_integration.py             | 10 ++--
 tests/metadata/test_metadata_query_statements.py   |  4 +-
 tests/metadata/test_recover_partitions.py          |  3 +-
 tests/query_test/test_hdfs_caching.py              |  3 +-
 tests/query_test/test_insert_behaviour.py          |  8 ++-
 tests/query_test/test_kudu.py                      | 36 ++++++++++----
 tests/query_test/test_queries.py                   |  3 +-
 tests/query_test/test_udfs.py                      |  6 ++-
 20 files changed, 222 insertions(+), 47 deletions(-)

diff --git a/docker/catalogd/Dockerfile b/docker/catalogd/Dockerfile
index 8699fb3..05e555d 100644
--- a/docker/catalogd/Dockerfile
+++ b/docker/catalogd/Dockerfile
@@ -22,4 +22,5 @@ FROM impala_base
 EXPOSE 25020
 
 ENTRYPOINT ["/opt/impala/bin/daemon_entrypoint.sh", "/opt/impala/bin/catalogd",\
-     "-abort_on_config_error=false", "-state_store_host=statestored"]
+     "-abort_on_config_error=false", "-state_store_host=statestored",\
+     "-catalog_topic_mode=minimal", "-hms_event_polling_interval_s=1"]
diff --git a/docker/coord_exec/Dockerfile b/docker/coord_exec/Dockerfile
index 6e6fa09..11356bc 100644
--- a/docker/coord_exec/Dockerfile
+++ b/docker/coord_exec/Dockerfile
@@ -27,4 +27,5 @@ EXPOSE 25000
 
 ENTRYPOINT ["/opt/impala/bin/daemon_entrypoint.sh", "/opt/impala/bin/impalad",\
      "-abort_on_config_error=false", "-state_store_host=statestored",\
-     "-catalog_service_host=catalogd", "-mem_limit_includes_jvm=true"]
+     "-catalog_service_host=catalogd", "-mem_limit_includes_jvm=true",\
+     "-use_local_catalog=true"]
diff --git a/docker/coordinator/Dockerfile b/docker/coordinator/Dockerfile
index def6da3..cf86f2e 100644
--- a/docker/coordinator/Dockerfile
+++ b/docker/coordinator/Dockerfile
@@ -28,4 +28,4 @@ EXPOSE 25000
 ENTRYPOINT ["/opt/impala/bin/daemon_entrypoint.sh", "/opt/impala/bin/impalad",\
      "-abort_on_config_error=false", "-state_store_host=statestored",\
      "-catalog_service_host=catalogd", "-is_executor=false", \
-     "-mem_limit_includes_jvm=true"]
+     "-mem_limit_includes_jvm=true", "-use_local_catalog=true"]
diff --git a/fe/src/test/java/org/apache/impala/service/JdbcTest.java b/fe/src/test/java/org/apache/impala/service/JdbcTest.java
index 820373d..1749fbe 100644
--- a/fe/src/test/java/org/apache/impala/service/JdbcTest.java
+++ b/fe/src/test/java/org/apache/impala/service/JdbcTest.java
@@ -473,15 +473,19 @@ public class JdbcTest {
     addTestTable("create table default.jdbc_column_comments_test (" +
          "a int comment 'column comment') comment 'table comment'");
 
-    // If a table is not yet loaded before getTables(), then the 'remarks' field
-    // is left empty. getColumns() loads the table metadata, so later getTables()
-    // calls will return 'remarks' correctly.
     ResultSet rs = con_.getMetaData().getTables(
         null, "default", "jdbc_column_comments_test", null);
     assertTrue(rs.next());
     assertEquals("Incorrect table name", "jdbc_column_comments_test",
         rs.getString("TABLE_NAME"));
-    assertEquals("Incorrect table comment", "", rs.getString("REMARKS"));
+
+    String remarks = rs.getString("REMARKS");
+    // IMPALA-7587: with catalog V2, if a table is not yet loaded before
+    // getTables(), then the 'remarks' field is left empty. getColumns()
+    // loads the table metadata, so later getTables() calls will return
+    // 'remarks' correctly.
+    assertTrue("Incorrect table comment: " + remarks,
+        remarks.equals("") || remarks.equals("table comment"));
 
     rs = con_.getMetaData().getColumns(
         null, "default", "jdbc_column_comments_test", null);
diff --git a/testdata/workloads/functional-query/queries/QueryTest/partition-ddl-predicates-hdfs-only.test b/testdata/workloads/functional-query/queries/QueryTest/partition-ddl-predicates-hdfs-only.test
index 5ca2a06..1a166b7 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/partition-ddl-predicates-hdfs-only.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/partition-ddl-predicates-hdfs-only.test
@@ -16,23 +16,23 @@ alter table p1_hdfs add partition (j=NULL,k=NULL);
 insert into p1_hdfs partition (j, k) values (100, 1, "a"), (200, 1, "b"), (300, 1, "c");
 ====
 ---- QUERY
-alter table p1_hdfs partition (j<2, k in ("b", "c")) set cached in 'testPool'
+alter table p1_hdfs partition (j<2, k in ("b", "c")) set fileformat parquet
 ---- RESULTS
-'Cached 2 partition(s).'
+'Updated 2 partition(s).'
 ---- TYPES
 STRING
 ====
 ---- QUERY
-alter table p1_hdfs partition (j<2, j>0, k<>"d") set uncached
+alter table p1_hdfs partition (j<2, j>0, k<>"d") set fileformat avro
 ---- RESULTS
-'Uncached 2 partition(s).'
+'Updated 3 partition(s).'
 ---- TYPES
 STRING
 ====
 ---- QUERY
-alter table p1_hdfs partition (j=3 or j=2, k like "%") set uncached
+alter table p1_hdfs partition (j=3 or j=2, k like "%") set fileformat parquet
 ---- RESULTS
-'Uncached 0 partition(s).'
+'Updated 3 partition(s).'
 ---- TYPES
 STRING
 ====
diff --git a/tests/common/impala_connection.py b/tests/common/impala_connection.py
index 6f0c0fc..2f6f60c 100644
--- a/tests/common/impala_connection.py
+++ b/tests/common/impala_connection.py
@@ -262,7 +262,6 @@ class ImpylaHS2Connection(ImpalaConnection):
     LOG.info("-- connecting to {0} with impyla".format(self.__host_port))
     host, port = self.__host_port.split(":")
     self.__impyla_conn = impyla.connect(host=host, port=int(port))
-    LOG.info("Conn {0}".format(self.__impyla_conn))
     # Get the default query options for the session before any modifications are made.
     self.__cursor = self.__impyla_conn.cursor()
     self.__cursor.execute("set all")
diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py
index 0be8c95..6630b3f 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -919,7 +919,25 @@ class ImpalaTestSuite(BaseTestSuite):
       except Exception:
         time.sleep(0.2)
         continue
-    raise Exception("Table {0} didn't show up after {1}s", db_name, timeout_s)
+    raise Exception("DB {0} didn't show up after {1}s", db_name, timeout_s)
+
+  def wait_for_table_to_appear(self, db_name, table_name, timeout_s):
+    """Wait until the table with 'table_name' in 'db_name' is present in the
+    impalad's local catalog. Fail after timeout_s if the doesn't appear."""
+    start_time = time.time()
+    while time.time() - start_time < timeout_s:
+      try:
+        # This will throw an exception if the table is not present.
+        self.client.execute("describe `{db_name}`.`{table_name}`".format(
+                            db_name=db_name, table_name=table_name))
+        return
+      except Exception, ex:
+        print str(ex)
+        time.sleep(0.2)
+        continue
+    raise Exception("Table {0}.{1} didn't show up after {2}s", db_name, table_name,
+                    timeout_s)
+
 
   def assert_impalad_log_contains(self, level, line_regex, expected_count=1):
     """
diff --git a/tests/common/skip.py b/tests/common/skip.py
index 3720378..8c2bdfb 100644
--- a/tests/common/skip.py
+++ b/tests/common/skip.py
@@ -213,3 +213,53 @@ class SkipIfCatalogV2:
     return pytest.mark.skipif(
       IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster(),
       reason="Test is specific to old implementation of catalog.")
+
+  # TODO: IMPALA-8486: fix invalidation or update tests to reflect expected behaviour.
+  @classmethod
+  def lib_cache_invalidation_broken(self):
+    return pytest.mark.skipif(
+      IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster(),
+      reason="IMPALA-8486: LibCache isn't invalidated by function DDL.")
+
+  # TODO: IMPALA-8458: fix bug or update tests to reflect expected behaviour.
+  @classmethod
+  def alter_column_stats_broken(self):
+    return pytest.mark.skipif(
+      IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster(),
+      reason="IMPALA-8458: setting column stats without setting NDV is no-op.")
+
+  # TODO: IMPALA-7131: add support or update tests to reflect expected behaviour.
+  @classmethod
+  def data_sources_unsupported(self):
+    return pytest.mark.skipif(
+      IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster(),
+      reason="IMPALA-7131: data sources not supported.")
+
+  # TODO: IMPALA-7538: add support or update tests to reflect expected behaviour.
+  @classmethod
+  def hdfs_caching_ddl_unsupported(self):
+    return pytest.mark.skipif(
+      IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster(),
+      reason="IMPALA-7538: HDFS caching DDL not supported.")
+
+  # TODO: IMPALA-8489: fix this bug.
+  @classmethod
+  def impala_8489(self):
+    return pytest.mark.skipif(
+      IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster(),
+      reason="IMPALA-8489: TestRecoverPartitions.test_post_invalidate "
+             "IllegalStateException.")
+
+  # TODO: IMPALA-8459: fix this bug.
+  @classmethod
+  def impala_8459(self):
+    return pytest.mark.skipif(
+      IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster(),
+      reason="IMPALA-8459: some kudu DDL is broken for local catalog")
+
+  # TODO: IMPALA-7539: fix this bug.
+  @classmethod
+  def impala_7539(self):
+    return pytest.mark.skipif(
+      IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster(),
+      reason="IMPALA-7539: support HDFS permission checks for LocalCatalog")
diff --git a/tests/hs2/test_hs2.py b/tests/hs2/test_hs2.py
index 16fc156..cec1d9e 100644
--- a/tests/hs2/test_hs2.py
+++ b/tests/hs2/test_hs2.py
@@ -24,6 +24,7 @@ import time
 from urllib2 import urlopen
 
 from ImpalaService import ImpalaHiveServer2Service
+from tests.common.environ import IMPALA_TEST_CLUSTER_PROPERTIES
 from tests.common.skip import SkipIfDockerizedCluster
 from tests.hs2.hs2_test_suite import HS2TestSuite, needs_session, operation_id_to_query_id
 from TCLIService import TCLIService
@@ -402,6 +403,52 @@ class TestHS2(HS2TestSuite):
     assert "Sql Statement: GET_SCHEMAS" in profile_page
     assert "Query Type: DDL" in profile_page
 
+  @pytest.mark.execute_serially
+  @needs_session()
+  def test_get_tables(self):
+    """Basic test for the GetTables() HS2 method. Needs to execute serially because
+    the test depends on controlling whether a table is loaded or not and other
+    concurrent tests loading or invalidating tables could interfere with it."""
+    # TODO: unique_database would be better, but it doesn't work with @needs_session
+    # at the moment.
+    table = "__hs2_column_comments_test"
+    self.execute_query("drop table if exists {0}".format(table))
+    self.execute_query("""
+        create table {0} (a int comment 'column comment')
+        comment 'table comment'""".format(table))
+    try:
+      req = TCLIService.TGetTablesReq()
+      req.sessionHandle = self.session_handle
+      req.schemaName = "default"
+      req.tableName = table
+
+      # Execute the request twice, the first time with the table unloaded and the second
+      # with it loaded.
+      self.execute_query("invalidate metadata {0}".format(table))
+      for i in range(2):
+        get_tables_resp = self.hs2_client.GetTables(req)
+        TestHS2.check_response(get_tables_resp)
+
+        fetch_results_resp = self._fetch_results(get_tables_resp.operationHandle, 100)
+        results = fetch_results_resp.results
+        table_cat = results.columns[0].stringVal.values[0]
+        table_schema = results.columns[1].stringVal.values[0]
+        table_name = results.columns[2].stringVal.values[0]
+        table_type = results.columns[3].stringVal.values[0]
+        table_remarks = results.columns[4].stringVal.values[0]
+        assert table_cat == ''
+        assert table_schema == "default"
+        assert table_name == table
+        assert table_type == "TABLE"
+        if i == 0 and not IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster():
+          # IMPALA-7587: comments not returned for non-loaded tables with legacy catalog.
+          assert table_remarks == ""
+        else:
+          assert table_remarks == "table comment"
+        # Ensure the table is loaded for the second iteration.
+        self.execute_query("describe {0}".format(table))
+    finally:
+      self.execute_query("drop table {0}".format(table))
 
   @needs_session(conf_overlay={"idle_session_timeout": "5"})
   def test_get_operation_status_session_timeout(self):
@@ -559,3 +606,13 @@ class TestHS2(HS2TestSuite):
     typed_col = getattr(results.columns[0], 'stringVal')
     for colType in types:
       assert typed_col.values.count(colType) == 1
+
+  def _fetch_results(self, operation_handle, max_rows):
+    """Fetch results from 'operation_handle' with up to 'max_rows' rows using
+    self.hs2_client, returning the TFetchResultsResp object."""
+    fetch_results_req = TCLIService.TFetchResultsReq()
+    fetch_results_req.operationHandle = operation_handle
+    fetch_results_req.maxRows = max_rows
+    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
+    TestHS2.check_response(fetch_results_resp)
+    return fetch_results_resp
diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py
index 05d8c01..765b06a 100644
--- a/tests/metadata/test_ddl.py
+++ b/tests/metadata/test_ddl.py
@@ -25,7 +25,8 @@ from test_ddl_base import TestDdlBase
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.impala_test_suite import LOG
 from tests.common.parametrize import UniqueDatabase
-from tests.common.skip import SkipIf, SkipIfABFS, SkipIfADLS, SkipIfKudu, SkipIfLocal
+from tests.common.skip import (SkipIf, SkipIfABFS, SkipIfADLS, SkipIfKudu, SkipIfLocal,
+                               SkipIfCatalogV2)
 from tests.common.test_dimensions import create_single_exec_option_dimension
 from tests.util.filesystem_utils import (
     WAREHOUSE,
@@ -420,12 +421,16 @@ class TestDdlStatements(TestDdlBase):
         file_data='1984')
     self.run_test_case('QueryTest/alter-table', vector, use_db=unique_database,
         multiple_impalad=self._use_multiple_impalad(vector))
-    # The following tests require HDFS caching which is supported only in the HDFS
-    # filesystem.
-    if IS_HDFS:
-      self.run_test_case('QueryTest/alter-table-hdfs-caching', vector,
-          use_db=unique_database, multiple_impalad=self._use_multiple_impalad(vector))
 
+  @SkipIf.not_hdfs
+  @SkipIfLocal.hdfs_client
+  @SkipIfCatalogV2.hdfs_caching_ddl_unsupported()
+  @UniqueDatabase.parametrize(sync_ddl=True, num_dbs=2)
+  def test_alter_table_hdfs_caching(self, vector, unique_database):
+    self.run_test_case('QueryTest/alter-table-hdfs-caching', vector,
+        use_db=unique_database, multiple_impalad=self._use_multiple_impalad(vector))
+
+  @SkipIfCatalogV2.alter_column_stats_broken()
   @UniqueDatabase.parametrize(sync_ddl=True)
   def test_alter_set_column_stats(self, vector, unique_database):
     self.run_test_case('QueryTest/alter-table-set-column-stats', vector,
@@ -797,6 +802,7 @@ class TestLibCache(TestDdlBase):
   # Run serially because this test inspects global impalad metrics.
   # TODO: The metrics checks could be relaxed to enable running this test in
   # parallel, but that might need a more general wait_for_metric_value().
+  @SkipIfCatalogV2.data_sources_unsupported()
   @pytest.mark.execute_serially
   def test_create_drop_data_src(self, vector, unique_database):
     """This will create, run, and drop the same data source repeatedly, exercising
diff --git a/tests/metadata/test_ddl_base.py b/tests/metadata/test_ddl_base.py
index 83399b0..63409ea 100644
--- a/tests/metadata/test_ddl_base.py
+++ b/tests/metadata/test_ddl_base.py
@@ -84,13 +84,17 @@ class TestDdlBase(ImpalaTestSuite):
     match = False
     properties = dict()
     for row in result.data:
-      if section_name in row:
-        match = True
+      fields = row.split("\t")
+      if fields[0] != '':
+        # Start of new section.
+        if match:
+          # Finished processing matching section.
+          break
+        match = section_name in fields[0]
       elif match:
-        row = row.split('\t')
-        if row[1] == 'NULL':
+        if fields[1] == 'NULL':
           break
-        properties[row[1].rstrip()] = row[2].rstrip()
+        properties[fields[1].rstrip()] = fields[2].rstrip()
     return properties
 
   def _get_property(self, property_name, name, is_db=False):
diff --git a/tests/metadata/test_hdfs_permissions.py b/tests/metadata/test_hdfs_permissions.py
index d495fc4..192920c 100644
--- a/tests/metadata/test_hdfs_permissions.py
+++ b/tests/metadata/test_hdfs_permissions.py
@@ -16,7 +16,8 @@
 # under the License.
 
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfLocal
+from tests.common.skip import (SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfLocal,
+                               SkipIfCatalogV2)
 from tests.common.test_dimensions import (
     create_single_exec_option_dimension,
     create_uncompressed_text_dimension)
@@ -53,6 +54,7 @@ class TestHdfsPermissions(ImpalaTestSuite):
     self.client.execute('drop table if exists %s' % TEST_TBL)
     self.hdfs_client.delete_file_dir('test-warehouse/%s' % TEST_TBL, recursive=True)
 
+  @SkipIfCatalogV2.impala_7539()
   def test_insert_into_read_only_table(self, vector):
     permission = 444
     if IS_ISILON:
diff --git a/tests/metadata/test_hms_integration.py b/tests/metadata/test_hms_integration.py
index 71a0879..c1a1734 100644
--- a/tests/metadata/test_hms_integration.py
+++ b/tests/metadata/test_hms_integration.py
@@ -84,9 +84,13 @@ class TestHmsIntegrationSanity(ImpalaTestSuite):
     # Creating a table with the same name using 'IF NOT EXISTS' in Impala should
     # not fail
     self.client.execute("create table if not exists hms_sanity_db.test_tbl (a int)")
-    # The table should not appear in the catalog unless invalidate metadata is
-    # executed
-    assert 'test_tbl' not in self.client.execute("show tables in hms_sanity_db").data
+    # The table should not appear in the catalog for catalog_v1 unless invalidate
+    # metadata is executed.
+    if IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster():
+      self.wait_for_table_to_appear("hms_sanity_db", "test_tbl", 10)
+      assert 'test_tbl' in self.client.execute("show tables in hms_sanity_db").data
+    else:
+      assert 'test_tbl' not in self.client.execute("show tables in hms_sanity_db").data
     self.client.execute("invalidate metadata hms_sanity_db.test_tbl")
     assert 'test_tbl' in self.client.execute("show tables in hms_sanity_db").data
 
diff --git a/tests/metadata/test_metadata_query_statements.py b/tests/metadata/test_metadata_query_statements.py
index 5633ba0..3921e87 100644
--- a/tests/metadata/test_metadata_query_statements.py
+++ b/tests/metadata/test_metadata_query_statements.py
@@ -23,7 +23,8 @@ import re
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.environ import IMPALA_TEST_CLUSTER_PROPERTIES
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.skip import SkipIfIsilon, SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfLocal
+from tests.common.skip import (SkipIfIsilon, SkipIfS3, SkipIfABFS, SkipIfADLS,
+                               SkipIfLocal, SkipIfCatalogV2)
 from tests.common.test_dimensions import ALL_NODES_ONLY
 from tests.common.test_dimensions import create_exec_option_dimension
 from tests.common.test_dimensions import create_uncompressed_text_dimension
@@ -134,6 +135,7 @@ class TestMetadataQueryStatements(ImpalaTestSuite):
         compare=compare_describe_formatted)
 
   @pytest.mark.execute_serially # due to data src setup/teardown
+  @SkipIfCatalogV2.data_sources_unsupported()
   def test_show_data_sources(self, vector):
     try:
       self.__create_data_sources()
diff --git a/tests/metadata/test_recover_partitions.py b/tests/metadata/test_recover_partitions.py
index 9ba4164..36a23ea 100644
--- a/tests/metadata/test_recover_partitions.py
+++ b/tests/metadata/test_recover_partitions.py
@@ -19,7 +19,7 @@
 
 import os
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.skip import SkipIfLocal, SkipIfS3
+from tests.common.skip import SkipIfLocal, SkipIfS3, SkipIfCatalogV2
 from tests.common.test_dimensions import ALL_NODES_ONLY
 from tests.common.test_dimensions import create_exec_option_dimension
 from tests.util.filesystem_utils import WAREHOUSE, IS_S3
@@ -246,6 +246,7 @@ class TestRecoverPartitions(ImpalaTestSuite):
         "duplicate partition key values." % FQ_TBL_NAME
 
   @SkipIfLocal.hdfs_client
+  @SkipIfCatalogV2.impala_8489()
   def test_post_invalidate(self, vector, unique_database):
     """Test that RECOVER PARTITIONS works correctly after invalidate."""
     TBL_NAME = "test_recover_partitions"
diff --git a/tests/query_test/test_hdfs_caching.py b/tests/query_test/test_hdfs_caching.py
index d683383..cab08d8 100644
--- a/tests/query_test/test_hdfs_caching.py
+++ b/tests/query_test/test_hdfs_caching.py
@@ -26,7 +26,7 @@ from tests.common.environ import build_flavor_timeout, IS_DOCKERIZED_TEST_CLUSTE
 from tests.common.impala_cluster import ImpalaCluster
 from tests.common.impala_test_suite import ImpalaTestSuite, LOG
 from tests.common.skip import (SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon,
-    SkipIfLocal, SkipIfEC, SkipIfDockerizedCluster)
+    SkipIfLocal, SkipIfEC, SkipIfDockerizedCluster, SkipIfCatalogV2)
 from tests.common.test_dimensions import create_single_exec_option_dimension
 from tests.util.filesystem_utils import get_fs_path
 from tests.util.shell_util import exec_process
@@ -179,6 +179,7 @@ class TestHdfsCachingFallbackPath(ImpalaTestSuite):
 @SkipIfADLS.caching
 @SkipIfIsilon.caching
 @SkipIfLocal.caching
+@SkipIfCatalogV2.hdfs_caching_ddl_unsupported()
 class TestHdfsCachingDdl(ImpalaTestSuite):
   @classmethod
   def get_workload(self):
diff --git a/tests/query_test/test_insert_behaviour.py b/tests/query_test/test_insert_behaviour.py
index 3bcb0c1..fc622b1 100644
--- a/tests/query_test/test_insert_behaviour.py
+++ b/tests/query_test/test_insert_behaviour.py
@@ -24,7 +24,7 @@ import re
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.parametrize import UniqueDatabase
 from tests.common.skip import (SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon,
-    SkipIfLocal, SkipIfDockerizedCluster)
+    SkipIfLocal, SkipIfDockerizedCluster, SkipIfCatalogV2)
 from tests.util.filesystem_utils import WAREHOUSE, get_fs_path, IS_S3
 
 @SkipIfLocal.hdfs_client
@@ -198,6 +198,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
   @SkipIfABFS.hdfs_acls
   @SkipIfADLS.hdfs_acls
   @SkipIfIsilon.hdfs_acls
+  @SkipIfCatalogV2.impala_7539()
   def test_insert_file_permissions(self, unique_database):
     """Test that INSERT correctly respects file permission (minimum ACLs)"""
     table = "`{0}`.`insert_acl_permissions`".format(unique_database)
@@ -250,6 +251,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
   @SkipIfABFS.hdfs_acls
   @SkipIfADLS.hdfs_acls
   @SkipIfIsilon.hdfs_acls
+  @SkipIfCatalogV2.impala_7539()
   def test_mixed_partition_permissions(self, unique_database):
     """
     Test that INSERT and LOAD DATA into explicit partitions is allowed even
@@ -331,6 +333,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
   @SkipIfABFS.hdfs_acls
   @SkipIfADLS.hdfs_acls
   @SkipIfIsilon.hdfs_acls
+  @SkipIfCatalogV2.impala_7539()
   def test_readonly_table_dir(self, unique_database):
     """
     Test that, if a partitioned table has a read-only base directory,
@@ -364,6 +367,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
   @SkipIfADLS.hdfs_acls
   @SkipIfIsilon.hdfs_acls
   @SkipIfDockerizedCluster.insert_acls
+  @SkipIfCatalogV2.impala_7539()
   def test_insert_acl_permissions(self, unique_database):
     """Test that INSERT correctly respects ACLs"""
     table = "`{0}`.`insert_acl_permissions`".format(unique_database)
@@ -443,6 +447,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
   @SkipIfABFS.hdfs_acls
   @SkipIfADLS.hdfs_acls
   @SkipIfIsilon.hdfs_acls
+  @SkipIfCatalogV2.impala_7539()
   def test_load_permissions(self, unique_database):
     # We rely on test_insert_acl_permissions() to exhaustively check that ACL semantics
     # are correct. Here we just validate that LOADs can't be done when we cannot read from
@@ -569,6 +574,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
   @SkipIfADLS.hdfs_acls
   @SkipIfIsilon.hdfs_acls
   @SkipIfDockerizedCluster.insert_acls
+  @SkipIfCatalogV2.impala_7539()
   def test_multiple_group_acls(self, unique_database):
     """Test that INSERT correctly respects multiple group ACLs"""
     table = "`{0}`.`insert_group_acl_permissions`".format(unique_database)
diff --git a/tests/query_test/test_kudu.py b/tests/query_test/test_kudu.py
index 776486c..216a41a 100644
--- a/tests/query_test/test_kudu.py
+++ b/tests/query_test/test_kudu.py
@@ -37,9 +37,10 @@ import time
 from datetime import datetime
 from pytz import utc
 
+from tests.common.environ import IMPALA_TEST_CLUSTER_PROPERTIES
 from tests.common.kudu_test_suite import KuduTestSuite
 from tests.common.impala_cluster import ImpalaCluster
-from tests.common.skip import SkipIfNotHdfsMinicluster, SkipIfKudu
+from tests.common.skip import SkipIfNotHdfsMinicluster, SkipIfKudu, SkipIfCatalogV2
 from tests.common.test_dimensions import add_exec_option_dimension
 from tests.verifiers.metric_verifier import MetricVerifier
 
@@ -185,11 +186,14 @@ class TestKuduOperations(KuduTestSuite):
       session.apply(op)
     session.flush()
 
-    # Scanning should result in an error
+    # Scanning should result in an error with Catalog V1, since the metadata is cached.
     try:
       cursor.execute("SELECT * FROM %s.foo" % (unique_database))
-      assert False
+      assert IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster(),\
+          "Should fail with Catalog V1, which caches metadata"
     except Exception as e:
+      assert not IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster(),\
+          "Should succeed with Catalog V2, which does not cache metadata"
       expected_error = "Column 's' is type INT but Impala expected STRING. The table "\
           "metadata in Impala may be outdated and need to be refreshed."
       assert expected_error in str(e)
@@ -229,8 +233,11 @@ class TestKuduOperations(KuduTestSuite):
     # Scanning should result in an error
     try:
       cursor.execute("SELECT * FROM %s.foo" % (unique_database))
-      assert False
+      assert IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster(),\
+          "Should fail with Catalog V1, which caches metadata"
     except Exception as e:
+      assert not IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster(),\
+          "Should succeed with Catalog V2, which does not cache metadata"
       expected_error = "Column 's' is nullable but Impala expected it to be "\
           "not nullable. The table metadata in Impala may be outdated and need to be "\
           "refreshed."
@@ -271,8 +278,11 @@ class TestKuduOperations(KuduTestSuite):
     # Scanning should result in an error
     try:
       cursor.execute("SELECT * FROM %s.foo" % (unique_database))
-      assert False
+      assert IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster(),\
+          "Should fail with Catalog V1, which caches metadata"
     except Exception as e:
+      assert not IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster(),\
+          "Should succeed with Catalog V2, which does not cache metadata"
       expected_error = "Column 's' is not nullable but Impala expected it to be "\
           "nullable. The table metadata in Impala may be outdated and need to be "\
           "refreshed."
@@ -305,12 +315,16 @@ class TestKuduOperations(KuduTestSuite):
     session.apply(op)
     session.flush()
 
-    # Only the first col is visible to Impala. Impala will not know about the missing
-    # column, so '*' is expanded to known columns. This doesn't have a separate check
-    # because the query can proceed and checking would need to fetch metadata from the
-    # Kudu master, which is what REFRESH is for.
     cursor.execute("SELECT * FROM %s.foo" % (unique_database))
-    assert cursor.fetchall() == [(0, )]
+    if IMPALA_TEST_CLUSTER_PROPERTIES.is_catalog_v2_cluster():
+      # Changes in Kudu should be immediately visible to Impala with Catalog V2.
+      assert cursor.fetchall() == [(0, 0)]
+    else:
+      # Only the first col is visible to Impala. Impala will not know about the missing
+      # column, so '*' is expanded to known columns. This doesn't have a separate check
+      # because the query can proceed and checking would need to fetch metadata from the
+      # Kudu master, which is what REFRESH is for.
+      assert cursor.fetchall() == [(0, )]
 
     # After a REFRESH both cols should be visible
     cursor.execute("REFRESH %s.foo" % (unique_database))
@@ -1062,6 +1076,7 @@ class TestImpalaKuduIntegration(KuduTestSuite):
              ("c", "string", "", "false", "true", "", "AUTO_ENCODING",
               "DEFAULT_COMPRESSION", "0")]
 
+  @SkipIfCatalogV2.impala_8459()
   def test_delete_external_kudu_table(self, cursor, kudu_client):
     """Check that Impala can recover from the case where the underlying Kudu table of
         an external table is dropped using the Kudu client.
@@ -1088,6 +1103,7 @@ class TestImpalaKuduIntegration(KuduTestSuite):
       cursor.execute("SHOW TABLES")
       assert (impala_table_name,) not in cursor.fetchall()
 
+  @SkipIfCatalogV2.impala_8459()
   def test_delete_managed_kudu_table(self, cursor, kudu_client, unique_database):
     """Check that dropping a managed Kudu table works even if the underlying Kudu table
         has been dropped externally."""
diff --git a/tests/query_test/test_queries.py b/tests/query_test/test_queries.py
index d156212..8e2ad17 100644
--- a/tests/query_test/test_queries.py
+++ b/tests/query_test/test_queries.py
@@ -22,7 +22,7 @@ import re
 from copy import deepcopy
 
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.skip import SkipIfEC, SkipIfDockerizedCluster
+from tests.common.skip import SkipIfEC, SkipIfCatalogV2
 from tests.common.test_dimensions import (
     create_uncompressed_text_dimension, extend_exec_option_dimension,
     create_beeswax_hs2_dimension, hs2_parquet_constraint)
@@ -170,6 +170,7 @@ class TestQueriesTextTables(ImpalaTestSuite):
     vector.get_value('exec_option')['abort_on_error'] = 1
     self.run_test_case('QueryTest/strict-mode-abort', vector)
 
+  @SkipIfCatalogV2.data_sources_unsupported()
   def test_data_source_tables(self, vector):
     self.run_test_case('QueryTest/data-source-tables', vector)
 
diff --git a/tests/query_test/test_udfs.py b/tests/query_test/test_udfs.py
index 771430f..5823ad3 100644
--- a/tests/query_test/test_udfs.py
+++ b/tests/query_test/test_udfs.py
@@ -24,7 +24,7 @@ from subprocess import call, check_call
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.impala_cluster import ImpalaCluster
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.skip import SkipIfLocal
+from tests.common.skip import SkipIfLocal, SkipIfCatalogV2
 from tests.common.test_dimensions import (
     create_exec_option_dimension,
     create_exec_option_dimension_from_dict,
@@ -49,7 +49,7 @@ class TestUdfBase(ImpalaTestSuite):
     for impalad in impala_cluster.impalads:
       client = impalad.service.create_beeswax_client()
       result = self.execute_query_expect_success(client, query, exec_options)
-      assert result.data == expected
+      assert result.data == expected, impalad
 
   def _load_functions(self, template, vector, database, location):
     queries = template.format(database=database, location=location)
@@ -507,6 +507,7 @@ class TestUdfTargeted(TestUdfBase):
   def test_libs_with_same_filenames(self, vector, unique_database):
     self.run_test_case('QueryTest/libs_with_same_filenames', vector, use_db=unique_database)
 
+  @SkipIfCatalogV2.lib_cache_invalidation_broken()
   def test_udf_update_via_drop(self, vector, unique_database):
     """Test updating the UDF binary without restarting Impala. Dropping
     the function should remove the binary from the local cache."""
@@ -540,6 +541,7 @@ class TestUdfTargeted(TestUdfBase):
     self.execute_query_expect_success(self.client, create_fn_stmt, exec_options)
     self._run_query_all_impalads(exec_options, query_stmt, ["New UDF"])
 
+  @SkipIfCatalogV2.lib_cache_invalidation_broken()
   def test_udf_update_via_create(self, vector, unique_database):
     """Test updating the UDF binary without restarting Impala. Creating a new function
     from the library should refresh the cache."""