You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/04/13 02:53:11 UTC

[impala] 03/03: IMPALA-7995: part 1: fixes for e2e dockerised impala tests

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 2ca7f8e7c0781a1914275b3506cf8a7748c44c85
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Mon Feb 11 17:00:54 2019 -0800

    IMPALA-7995: part 1: fixes for e2e dockerised impala tests
    
    This fixes all core e2e tests running on my local dockerised
    minicluster build. I do not yet have a CI job or script running
    but I wanted to get feedback on these changes sooner. The second
    part of the change will include the CI script and any follow-on
    fixes required for the exhaustive tests.
    
    The following fixes were required:
    * Detect docker_network from TEST_START_CLUSTER_ARGS
    * get_webserver_port() does not depend on the caller passing in
      the default webserver port. It failed previously because it
      relied on start-impala-cluster.py setting -webserver_port
      for *all* processes.
    * Add SkipIf markers for tests that don't make sense or are
      non-trivial to fix for containerised Impala.
    * Support loading Impala-lzo plugin from host for tests that depend on
      it.
    * Fix some tests that had 'localhost' hardcoded - instead it should
      be $INTERNAL_LISTEN_HOST, which defaults to localhost.
    * Fix bug with sorting impala daemons by backend port, which is
      the same for all dockerised impalads.
    
    Testing:
    I ran tests locally as follows after having set up a docker network and
    starting other services:
    
      ./buildall.sh -noclean -notests -ninja
      ninja -j $IMPALA_BUILD_THREADS docker_images
      export TEST_START_CLUSTER_ARGS="--docker_network=impala-cluster"
      export FE_TEST=false
      export BE_TEST=false
      export JDBC_TEST=false
      export CLUSTER_TEST=false
      ./bin/run-all-tests.sh
    
    Change-Id: Iee86cbd2c4631a014af1e8cef8e1cd523a812755
    Reviewed-on: http://gerrit.cloudera.org:8080/12639
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/start-impala-cluster.py                        |  8 +++
 docker/daemon_entrypoint.sh                        |  3 +
 docker/impala_base/Dockerfile                      |  5 +-
 .../queries/QueryTest/kudu_alter.test              |  7 +-
 .../queries/QueryTest/kudu_create.test             |  2 +-
 .../catalog_service/test_catalog_service_client.py |  5 +-
 tests/common/custom_cluster_test_suite.py          |  2 +-
 tests/common/environ.py                            |  7 ++
 tests/common/impala_cluster.py                     | 84 +++++++++++++---------
 tests/common/impala_test_suite.py                  | 21 ++++--
 tests/common/skip.py                               | 20 +++++-
 tests/conftest.py                                  |  4 +-
 tests/custom_cluster/test_jvm_mem_tracking.py      |  2 +-
 tests/custom_cluster/test_krpc_mem_usage.py        |  3 +-
 tests/custom_cluster/test_rpc_timeout.py           |  9 ++-
 tests/custom_cluster/test_udf_concurrency.py       |  2 +-
 tests/hs2/test_fetch_first.py                      |  2 +-
 tests/hs2/test_hs2.py                              |  3 +
 tests/hs2/test_json_endpoints.py                   |  6 +-
 tests/metadata/test_compute_stats.py               |  5 +-
 tests/metadata/test_ddl.py                         |  4 +-
 tests/observability/test_log_fragments.py          |  2 +
 tests/query_test/test_hash_join_timer.py           |  2 +-
 tests/query_test/test_hdfs_caching.py              |  8 ++-
 tests/query_test/test_insert.py                    |  6 +-
 tests/query_test/test_insert_behaviour.py          |  5 +-
 tests/query_test/test_kudu.py                      |  3 +-
 tests/query_test/test_lifecycle.py                 |  6 +-
 tests/query_test/test_local_fs.py                  |  2 +
 tests/query_test/test_mem_usage_scaling.py         |  2 +-
 tests/query_test/test_queries.py                   |  3 +-
 tests/query_test/test_udfs.py                      |  6 +-
 tests/run-tests.py                                 |  4 +-
 tests/statestore/test_statestore.py                |  4 +-
 tests/stress/test_mini_stress.py                   |  2 +-
 tests/webserver/test_web_pages.py                  |  2 +-
 36 files changed, 181 insertions(+), 80 deletions(-)

diff --git a/bin/start-impala-cluster.py b/bin/start-impala-cluster.py
index b5a5a66..7b22df5 100755
--- a/bin/start-impala-cluster.py
+++ b/bin/start-impala-cluster.py
@@ -128,6 +128,7 @@ options, args = parser.parse_args()
 IMPALA_HOME = os.environ["IMPALA_HOME"]
 CORE_SITE_PATH = os.path.join(IMPALA_HOME, "fe/src/test/resources/core-site.xml")
 KNOWN_BUILD_TYPES = ["debug", "release", "latest"]
+IMPALA_LZO = os.environ["IMPALA_LZO"]
 
 # Kills have a timeout to prevent automated scripts from hanging indefinitely.
 # It is set to a high value to avoid failing if processes are slow to shut down.
@@ -539,6 +540,13 @@ class DockerMiniClusterOperations(object):
     # for config changes to take effect.
     conf_dir = os.path.join(IMPALA_HOME, "fe/target/test-classes")
     mount_args = ["--mount", "type=bind,src={0},dst=/opt/impala/conf".format(conf_dir)]
+
+    # Allow loading LZO plugin, if built.
+    lzo_lib_dir = os.path.join(IMPALA_LZO, "build")
+    if os.path.isdir(lzo_lib_dir):
+      mount_args += ["--mount",
+                     "type=bind,src={0},dst=/opt/impala/lib/plugins".format(lzo_lib_dir)]
+
     mem_limit_args = []
     if mem_limit is not None:
       mem_limit_args = ["--memory", str(mem_limit)]
diff --git a/docker/daemon_entrypoint.sh b/docker/daemon_entrypoint.sh
index ac1efe0..e745745 100755
--- a/docker/daemon_entrypoint.sh
+++ b/docker/daemon_entrypoint.sh
@@ -31,6 +31,9 @@ export LD_LIBRARY_PATH=/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/
 LD_LIBRARY_PATH+=:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server/
 LD_LIBRARY_PATH+=:/opt/kudu/release/lib
 
+# Add directory with optional plugins that can be mounted for the container.
+LD_LIBRARY_PATH+=:/opt/impala/lib/plugins
+
 # Configs should be first on classpath
 export CLASSPATH=/opt/impala/conf
 # Append all of the jars in /opt/impala/lib to the classpath.
diff --git a/docker/impala_base/Dockerfile b/docker/impala_base/Dockerfile
index 914b957..656bf7f 100644
--- a/docker/impala_base/Dockerfile
+++ b/docker/impala_base/Dockerfile
@@ -17,10 +17,13 @@
 
 FROM ubuntu:16.04
 
+# Install minimal dependencies required for Impala services to run.
+# liblzo2-2 may be needed by the Impala-lzo plugin, which is used in tests.
+# We install it in the base image for convenience.
 RUN apt-get update && \
   apt-get install -y openjdk-8-jre-headless \
   libsasl2-2 libsasl2-modules libsasl2-modules-gssapi-mit \
-  tzdata
+  tzdata liblzo2-2
 
 # Copy build artifacts required for the daemon processes.
 # Need to have multiple copy commands to preserve directory structure.
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
index 801bad8..91289fd 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
@@ -14,7 +14,7 @@ AnalysisException: SHOW RANGE PARTITIONS requested but table does not have range
 ---- QUERY
 # Alter master address to a different location
 alter table simple set tblproperties (
-  'kudu.master_addresses' = 'localhost'
+  'kudu.master_addresses' = '$INTERNAL_LISTEN_HOST'
 )
 ---- RESULTS
 'Updated table.'
@@ -23,14 +23,15 @@ STRING
 ====
 ---- QUERY
 # Show that new address is picked up
+# Note that "describe formatted" pads the hostname string so we need to use a regex.
 describe formatted simple
 ---- RESULTS: VERIFY_IS_SUBSET
-'','kudu.master_addresses','localhost           '
+row_regex:'','kudu.master_addresses','$INTERNAL_LISTEN_HOST *'
 ---- TYPES
 STRING,STRING,STRING
 ====
 ---- QUERY
-alter table simple set tblproperties ('kudu.master_addresses' = '127.0.0.1')
+alter table simple set tblproperties ('kudu.master_addresses' = '$INTERNAL_LISTEN_IP')
 ---- RESULTS
 'Updated table.'
 ---- TYPES
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test
index 5260af2..039af92 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test
@@ -112,7 +112,7 @@ Couldn't resolve this master's address bogus.host.name:7051
 ---- QUERY
 # Valid host with whitespace
 create table tdata_master_addresses_whitespace (id int primary key) stored as kudu
-  tblproperties('kudu.master_addresses' = '  localhost  ')
+  tblproperties('kudu.master_addresses' = '  $INTERNAL_LISTEN_HOST  ')
 ---- RESULTS
 'Table has been created.'
 ====
diff --git a/tests/catalog_service/test_catalog_service_client.py b/tests/catalog_service/test_catalog_service_client.py
index 05a4f24..939c4a2 100644
--- a/tests/catalog_service/test_catalog_service_client.py
+++ b/tests/catalog_service/test_catalog_service_client.py
@@ -27,6 +27,7 @@ from thrift.protocol import TBinaryProtocol
 
 from tests.common.impala_cluster import ImpalaCluster
 from tests.common.impala_test_suite import ImpalaTestSuite
+from tests.common.skip import SkipIfDockerizedCluster
 from tests.common.test_dimensions import create_single_exec_option_dimension
 from tests.util.filesystem_utils import WAREHOUSE
 from tests.util.thrift_util import create_transport
@@ -53,9 +54,9 @@ class TestCatalogServiceClient(ImpalaTestSuite):
         v.get_value('table_format').file_format == 'parquet' and\
         v.get_value('table_format').compression_codec == 'none')
 
-
+  @SkipIfDockerizedCluster.catalog_service_not_exposed
   def test_get_functions(self, vector, unique_database):
-    impala_cluster = ImpalaCluster()
+    impala_cluster = ImpalaCluster.get_e2e_test_cluster()
     catalogd = impala_cluster.catalogd.service
     trans_type = 'buffered'
     if pytest.config.option.use_kerberos:
diff --git a/tests/common/custom_cluster_test_suite.py b/tests/common/custom_cluster_test_suite.py
index 841f61b..acba2c4 100644
--- a/tests/common/custom_cluster_test_suite.py
+++ b/tests/common/custom_cluster_test_suite.py
@@ -230,7 +230,7 @@ class CustomClusterTestSuite(ImpalaTestSuite):
       check_call(cmd + options, close_fds=True)
     finally:
       # Failure tests expect cluster to be initialised even if start-impala-cluster fails.
-      cls.cluster = ImpalaCluster()
+      cls.cluster = ImpalaCluster.get_e2e_test_cluster()
     statestored = cls.cluster.statestored
     if statestored is None:
       raise Exception("statestored was not found")
diff --git a/tests/common/environ.py b/tests/common/environ.py
index e20aa98..17b2329 100644
--- a/tests/common/environ.py
+++ b/tests/common/environ.py
@@ -35,6 +35,13 @@ if build_type_arg_search_result is not None:
 else:
   build_type_dir = 'latest'
 
+docker_network = None
+docker_network_regex = re.compile(r'--docker_network=(\S+)', re.I)
+docker_network_search_result = re.search(docker_network_regex, test_start_cluster_args)
+if docker_network_search_result is not None:
+  docker_network = docker_network_search_result.groups()[0]
+IS_DOCKERIZED_TEST_CLUSTER = docker_network is not None
+
 # Resolve any symlinks in the path.
 impalad_basedir = \
     os.path.realpath(os.path.join(IMPALA_HOME, 'be/build', build_type_dir)).rstrip('/')
diff --git a/tests/common/impala_cluster.py b/tests/common/impala_cluster.py
index 996b640..4b45979 100644
--- a/tests/common/impala_cluster.py
+++ b/tests/common/impala_cluster.py
@@ -31,16 +31,17 @@ from signal import SIGKILL
 from subprocess import check_call
 from time import sleep
 
-if sys.version_info >= (2, 7):
-  # We use some functions in the docker code that don't exist in Python 2.6.
-  from subprocess import check_output
-
+import tests.common.environ
 from tests.common.impala_service import (
     CatalogdService,
     ImpaladService,
     StateStoredService)
 from tests.util.shell_util import exec_process, exec_process_async
 
+if sys.version_info >= (2, 7):
+  # We use some functions in the docker code that don't exist in Python 2.6.
+  from subprocess import check_output
+
 LOG = logging.getLogger('impala_cluster')
 LOG.setLevel(level=logging.DEBUG)
 
@@ -77,6 +78,12 @@ class ImpalaCluster(object):
     self.docker_network = docker_network
     self.refresh()
 
+  @classmethod
+  def get_e2e_test_cluster(cls):
+    """Within end-to-end tests, get the cluster under test with settings detected from
+    the environment."""
+    return ImpalaCluster(docker_network=tests.common.environ.docker_network)
+
   def refresh(self):
     """ Re-loads the impalad/statestored/catalogd processes if they exist.
 
@@ -222,11 +229,7 @@ class ImpalaCluster(object):
       elif process.name == 'catalogd':
         catalogd = CatalogdProcess(cmdline)
 
-    # If the operating system PIDs wrap around during startup of the local minicluster,
-    # the order of the impalads is incorrect. We order them by their HS2 port, so that
-    # get_first_impalad() always returns the first one. We need to use a port that is
-    # exposed and mapped to a host port for the containerised cluster.
-    impalads.sort(key=lambda i: i.service.hs2_port)
+    self.__sort_impalads(impalads)
     return impalads, statestored, catalogd
 
   def __find_docker_containers(self):
@@ -239,7 +242,7 @@ class ImpalaCluster(object):
     output = check_output(["docker", "network", "inspect", self.docker_network])
     # Only one network should be present in the top level array.
     for container_id in json.loads(output)[0]["Containers"]:
-      container_info = self._get_container_info(container_id)
+      container_info = get_container_info(container_id)
       if container_info["State"]["Status"] != "running":
         # Skip over stopped containers.
         continue
@@ -262,16 +265,15 @@ class ImpalaCluster(object):
         assert catalogd is None
         catalogd = CatalogdProcess(args, container_id=container_id,
                                    port_map=port_map)
-    impalads.sort(key=lambda i: i.service.be_port)
+    self.__sort_impalads(impalads)
     return impalads, statestoreds, catalogd
 
-  def _get_container_info(self, container_id):
-    """Get the output of "docker container inspect" as a python data structure."""
-    containers = json.loads(
-        check_output(["docker", "container", "inspect", container_id]))
-    # Only one container should be present in the top level array.
-    assert len(containers) == 1, json.dumps(containers, indent=4)
-    return containers[0]
+  def __sort_impalads(self, impalads):
+    """Does an in-place sort of a list of ImpaladProcess objects into a canonical order.
+    We order them by their HS2 port, so that get_first_impalad() always returns the
+    first one. We need to use a port that is exposed and mapped to a host port for
+    the containerised cluster."""
+    impalads.sort(key=lambda i: i.service.hs2_port)
 
 
 # Represents a process running on a machine and common actions that can be performed
@@ -332,10 +334,10 @@ class Process(object):
 
   def __get_pids(self):
     if self.container_id is not None:
-      container_info = self._get_container_info(self.container_id)
+      container_info = get_container_info(self.container_id)
       if container_info["State"]["Status"] != "running":
         return []
-      return [container_info["State"]["Status"]["Pid"]]
+      return [container_info["State"]["Pid"]]
 
     # In non-containerised case, search for process based on matching command lines.
     pids = []
@@ -394,8 +396,13 @@ class BaseImpalaProcess(Process):
     super(BaseImpalaProcess, self).__init__(cmd, container_id, port_map)
     self.hostname = self._get_hostname()
 
-  def _get_webserver_port(self, default=None):
-    return int(self._get_port('webserver_port', default))
+  def get_webserver_port(self):
+    """Return the port for the webserver of this process."""
+    return int(self._get_port('webserver_port', self._get_default_webserver_port()))
+
+  def _get_default_webserver_port(self):
+    """Different daemons have different defaults. Subclasses must override."""
+    raise NotImplementedError()
 
   def _get_webserver_certificate_file(self):
     # TODO: if this is containerised, the path will likely not be the same on the host.
@@ -410,7 +417,7 @@ class BaseImpalaProcess(Process):
       if ('%s=' % arg_name) in arg.strip().lstrip('-'):
         return arg.split('=')[1]
     if default is None:
-      assert 0, "No command line argument '%s' found." % arg_name
+      assert 0, "Argument '{0}' not found in cmd '{1}'.".format(arg_name, self.cmd)
     return default
 
   def _get_port(self, arg_name, default):
@@ -426,14 +433,14 @@ class BaseImpalaProcess(Process):
 class ImpaladProcess(BaseImpalaProcess):
   def __init__(self, cmd, container_id=None, port_map=None):
     super(ImpaladProcess, self).__init__(cmd, container_id, port_map)
-    self.service = ImpaladService(self.hostname,
-                                  self._get_webserver_port(
-                                      default=DEFAULT_IMPALAD_WEBSERVER_PORT),
-                                  self.__get_beeswax_port(),
-                                  self.__get_be_port(),
+    self.service = ImpaladService(self.hostname, self.get_webserver_port(),
+                                  self.__get_beeswax_port(), self.__get_be_port(),
                                   self.__get_hs2_port(),
                                   self._get_webserver_certificate_file())
 
+  def _get_default_webserver_port(self):
+    return DEFAULT_IMPALAD_WEBSERVER_PORT
+
   def __get_beeswax_port(self):
     return int(self._get_port('beeswax_port', DEFAULT_BEESWAX_PORT))
 
@@ -485,18 +492,22 @@ class StateStoreProcess(BaseImpalaProcess):
   def __init__(self, cmd, container_id=None, port_map=None):
     super(StateStoreProcess, self).__init__(cmd, container_id, port_map)
     self.service = StateStoredService(self.hostname,
-        self._get_webserver_port(default=DEFAULT_STATESTORED_WEBSERVER_PORT),
-        self._get_webserver_certificate_file())
+        self.get_webserver_port(), self._get_webserver_certificate_file())
+
+  def _get_default_webserver_port(self):
+    return DEFAULT_STATESTORED_WEBSERVER_PORT
 
 
 # Represents a catalogd process
 class CatalogdProcess(BaseImpalaProcess):
   def __init__(self, cmd, container_id=None, port_map=None):
     super(CatalogdProcess, self).__init__(cmd, container_id, port_map)
-    self.service = CatalogdService(self.hostname,
-        self._get_webserver_port(default=DEFAULT_CATALOGD_WEBSERVER_PORT),
+    self.service = CatalogdService(self.hostname, self.get_webserver_port(),
         self._get_webserver_certificate_file(), self.__get_port())
 
+  def _get_default_webserver_port(self):
+    return DEFAULT_CATALOGD_WEBSERVER_PORT
+
   def __get_port(self):
     return int(self._get_port('catalog_service_port', DEFAULT_CATALOG_SERVICE_PORT))
 
@@ -548,3 +559,12 @@ def run_daemon(daemon_binary, args, build_type="latest", env_vars={}, output_fil
       cmd=' '.join([pipes.quote(tok) for tok in cmd]),
       redirect=redirect))
   os.system(sys_cmd)
+
+
+def get_container_info(container_id):
+  """Get the output of "docker container inspect" as a python data structure."""
+  containers = json.loads(
+      check_output(["docker", "container", "inspect", container_id]))
+  # Only one container should be present in the top level array.
+  assert len(containers) == 1, json.dumps(containers, indent=4)
+  return containers[0]
diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py
index 1a2fff6..0f18144 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -25,6 +25,7 @@ import pwd
 import pytest
 import re
 import shutil
+import socket
 import subprocess
 import tempfile
 import time
@@ -102,6 +103,9 @@ WORKLOAD_DIR = os.environ['IMPALA_WORKLOAD_DIR']
 HDFS_CONF = HdfsConfig(pytest.config.option.minicluster_xml_conf)
 TARGET_FILESYSTEM = os.getenv("TARGET_FILESYSTEM") or "hdfs"
 IMPALA_HOME = os.getenv("IMPALA_HOME")
+INTERNAL_LISTEN_HOST = os.getenv("INTERNAL_LISTEN_HOST")
+# Some tests use the IP instead of the host.
+INTERNAL_LISTEN_IP = socket.gethostbyname_ex(INTERNAL_LISTEN_HOST)[2][0]
 EE_TEST_LOGS_DIR = os.getenv("IMPALA_EE_TEST_LOGS_DIR")
 # Match any SET statement. Assume that query options' names
 # only contain alphabets, underscores and digits after position 1.
@@ -311,7 +315,9 @@ class ImpalaTestSuite(BaseTestSuite):
           .replace('$FILESYSTEM_PREFIX', FILESYSTEM_PREFIX) \
           .replace('$FILESYSTEM_NAME', FILESYSTEM_NAME) \
           .replace('$NAMENODE', NAMENODE) \
-          .replace('$IMPALA_HOME', IMPALA_HOME)
+          .replace('$IMPALA_HOME', IMPALA_HOME) \
+          .replace('$INTERNAL_LISTEN_HOST', INTERNAL_LISTEN_HOST)\
+          .replace('$INTERNAL_LISTEN_IP', INTERNAL_LISTEN_IP)
       if use_db: expected_str = expected_str.replace('$DATABASE', use_db)
       # Strip newlines so we can split error message into multiple lines
       expected_str = expected_str.replace('\n', '')
@@ -339,7 +345,10 @@ class ImpalaTestSuite(BaseTestSuite):
                                      .replace('$NAMENODE', NAMENODE) \
                                      .replace('$IMPALA_HOME', IMPALA_HOME) \
                                      .replace('$USER', getuser()) \
-                                     .replace('$FILESYSTEM_NAME', FILESYSTEM_NAME)
+                                     .replace('$FILESYSTEM_NAME', FILESYSTEM_NAME) \
+                                     .replace('$INTERNAL_LISTEN_HOST',
+                                              INTERNAL_LISTEN_HOST) \
+                                     .replace('$INTERNAL_LISTEN_IP', INTERNAL_LISTEN_IP)
         if use_db:
           test_section[section_name] = test_section[section_name].replace('$DATABASE', use_db)
     result_section, type_section = 'RESULTS', 'TYPES'
@@ -436,7 +445,9 @@ class ImpalaTestSuite(BaseTestSuite):
           .replace('$FILESYSTEM_PREFIX', FILESYSTEM_PREFIX)
           .replace('$FILESYSTEM_NAME', FILESYSTEM_NAME)
           .replace('$SECONDARY_FILESYSTEM', os.getenv("SECONDARY_FILESYSTEM") or str())
-          .replace('$USER', getuser()))
+          .replace('$USER', getuser())
+          .replace('$INTERNAL_LISTEN_HOST', INTERNAL_LISTEN_HOST)
+          .replace('$INTERNAL_LISTEN_IP', INTERNAL_LISTEN_IP))
       if use_db: query = query.replace('$DATABASE', use_db)
 
       reserved_keywords = ["$DATABASE", "$FILESYSTEM_PREFIX", "$FILESYSTEM_NAME",
@@ -512,7 +523,9 @@ class ImpalaTestSuite(BaseTestSuite):
       if pytest.config.option.update_results and 'RESULTS' in test_section:
         test_section['RESULTS'] = test_section['RESULTS'] \
             .replace(NAMENODE, '$NAMENODE') \
-            .replace('$IMPALA_HOME', IMPALA_HOME)
+            .replace('$IMPALA_HOME', IMPALA_HOME) \
+            .replace(INTERNAL_LISTEN_HOST, '$INTERNAL_LISTEN_HOST') \
+            .replace(INTERNAL_LISTEN_IP, '$INTERNAL_LISTEN_IP')
       rt_profile_info = None
       if 'RUNTIME_PROFILE_%s' % table_format_info.file_format in test_section:
         # If this table format has a RUNTIME_PROFILE section specifically for it, evaluate
diff --git a/tests/common/skip.py b/tests/common/skip.py
index 709c446..95a62da 100644
--- a/tests/common/skip.py
+++ b/tests/common/skip.py
@@ -24,7 +24,8 @@ import os
 import pytest
 from functools import partial
 
-from tests.common.environ import IMPALA_TEST_CLUSTER_PROPERTIES
+from tests.common.environ import (IMPALA_TEST_CLUSTER_PROPERTIES,
+    IS_DOCKERIZED_TEST_CLUSTER)
 from tests.common.kudu_test_suite import get_kudu_master_flag
 from tests.util.filesystem_utils import (
     IS_ABFS,
@@ -178,3 +179,20 @@ class SkipIfEC:
   oom = pytest.mark.skipif(IS_EC, reason="Probably broken by HDFS-13540.")
   fix_later = pytest.mark.skipif(IS_EC, reason="It should work but doesn't.")
   scheduling = pytest.mark.skipif(IS_EC, reason="Scheduling is different on EC")
+
+
+class SkipIfDockerizedCluster:
+  catalog_service_not_exposed = pytest.mark.skipif(
+      IS_DOCKERIZED_TEST_CLUSTER, reason="Catalog service not exposed.")
+  statestore_not_exposed = pytest.mark.skipif(
+      IS_DOCKERIZED_TEST_CLUSTER, reason="Statestore service not exposed.")
+  internal_hostname = pytest.mark.skipif(
+      IS_DOCKERIZED_TEST_CLUSTER, reason="Internal hostname is used, not local hostname.")
+  daemon_logs_not_exposed = pytest.mark.skipif(
+      IS_DOCKERIZED_TEST_CLUSTER, reason="Daemon logs not exposed in host.")
+  accesses_host_filesystem = pytest.mark.skipif(
+      IS_DOCKERIZED_TEST_CLUSTER, reason="Daemon would need to access host filesystem.")
+  jvm_oom_large_string = pytest.mark.skipif(IS_DOCKERIZED_TEST_CLUSTER,
+      reason="IMPALA-4865: JVM hits OOM for large string. Heap is smaller in docker.")
+  insert_acls = pytest.mark.skipif(IS_DOCKERIZED_TEST_CLUSTER,
+      reason="IMPALA-8384: insert ACL tests are broken on dockerised minicluster.")
diff --git a/tests/conftest.py b/tests/conftest.py
index 5a66900..e5904ba 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -174,7 +174,9 @@ def pytest_assertrepr_compare(op, left, right):
   if isinstance(left, set) and isinstance(right, set) and op == '<=':
     # If expected is not a subset of actual, print out the set difference.
     result = ['Items in expected results not found in actual results:']
-    result.append(('').join(list(left - right)))
+    result.extend(list(left - right))
+    result.append('Items in actual results:')
+    result.extend(list(right))
     LOG.error('\n'.join(result))
     return result
 
diff --git a/tests/custom_cluster/test_jvm_mem_tracking.py b/tests/custom_cluster/test_jvm_mem_tracking.py
index e0981cf..c93af86 100644
--- a/tests/custom_cluster/test_jvm_mem_tracking.py
+++ b/tests/custom_cluster/test_jvm_mem_tracking.py
@@ -40,7 +40,7 @@ class TestJvmMemTracker(CustomClusterTestSuite):
   @CustomClusterTestSuite.with_args(impalad_args="--mem_limit_includes_jvm=true",
                                     start_args="--jvm_args=-Xmx1g", cluster_size=1)
   def test_jvm_mem_tracking(self, vector):
-    service = ImpalaCluster().impalads[0].service
+    service = ImpalaCluster.get_e2e_test_cluster().impalads[0].service
     verifier = MemUsageVerifier(service)
     proc_values = verifier.get_mem_usage_values('Process')
     proc_total = proc_values['total']
diff --git a/tests/custom_cluster/test_krpc_mem_usage.py b/tests/custom_cluster/test_krpc_mem_usage.py
index 6b0fe6a..0d76ca8 100644
--- a/tests/custom_cluster/test_krpc_mem_usage.py
+++ b/tests/custom_cluster/test_krpc_mem_usage.py
@@ -53,7 +53,8 @@ class TestKrpcMemUsage(CustomClusterTestSuite):
     """Verifies that the memory used by KRPC is returned to the memtrackers and that
     metrics in 'non_zero_peak_metrics' have a peak value > 0.
     """
-    verifiers = [ MemUsageVerifier(i.service) for i in ImpalaCluster().impalads ]
+    verifiers = [MemUsageVerifier(i.service)
+                 for i in ImpalaCluster.get_e2e_test_cluster().impalads]
     for verifier in verifiers:
       for metric_name in ALL_METRICS:
         usage = verifier.get_mem_usage_values(metric_name)
diff --git a/tests/custom_cluster/test_rpc_timeout.py b/tests/custom_cluster/test_rpc_timeout.py
index 6ff2bf8..63aca11 100644
--- a/tests/custom_cluster/test_rpc_timeout.py
+++ b/tests/custom_cluster/test_rpc_timeout.py
@@ -45,7 +45,8 @@ class TestRPCTimeout(CustomClusterTestSuite):
         self.execute_query(query, query_options)
       except ImpalaBeeswaxException:
         pass
-    verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]
+    verifiers = [MetricVerifier(i.service)
+                 for i in ImpalaCluster.get_e2e_test_cluster().impalads]
 
     for v in verifiers:
       v.wait_for_metric("impala-server.num-fragments-in-flight", 0)
@@ -61,7 +62,8 @@ class TestRPCTimeout(CustomClusterTestSuite):
         pass
       finally:
         self.client.close_query(handle)
-    verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]
+    verifiers = [MetricVerifier(i.service)
+                 for i in ImpalaCluster.get_e2e_test_cluster().impalads]
 
     for v in verifiers:
       v.wait_for_metric("impala-server.num-fragments-in-flight", 0)
@@ -93,7 +95,8 @@ class TestRPCTimeout(CustomClusterTestSuite):
     for i in range(3):
       ex= self.execute_query_expect_failure(self.client, self.TEST_QUERY)
       assert "RPC recv timed out" in str(ex)
-    verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]
+    verifiers = [MetricVerifier(i.service) for i in
+                 ImpalaCluster.get_e2e_test_cluster().impalads]
 
     for v in verifiers:
       v.wait_for_metric("impala-server.num-fragments-in-flight", 0)
diff --git a/tests/custom_cluster/test_udf_concurrency.py b/tests/custom_cluster/test_udf_concurrency.py
index f0fac27..35a41c4 100644
--- a/tests/custom_cluster/test_udf_concurrency.py
+++ b/tests/custom_cluster/test_udf_concurrency.py
@@ -68,7 +68,7 @@ class TestUdfConcurrency(CustomClusterTestSuite):
 
     # Tracks number of impalads prior to tests to check that none have crashed.
     # All impalads are assumed to be coordinators.
-    cluster = ImpalaCluster()
+    cluster = ImpalaCluster.get_e2e_test_cluster()
     exp_num_coordinators = cluster.num_responsive_coordinators()
 
     setup_client = self.create_impala_client()
diff --git a/tests/hs2/test_fetch_first.py b/tests/hs2/test_fetch_first.py
index 4466f91..12451bb 100644
--- a/tests/hs2/test_fetch_first.py
+++ b/tests/hs2/test_fetch_first.py
@@ -32,7 +32,7 @@ class TestFetchFirst(HS2TestSuite):
   def __test_invalid_result_caching(self, sql_stmt):
     """ Tests that invalid requests for query-result caching fail
     using the given sql_stmt."""
-    impala_cluster = ImpalaCluster()
+    impala_cluster = ImpalaCluster.get_e2e_test_cluster()
     impalad = impala_cluster.impalads[0].service
 
     execute_statement_req = TCLIService.TExecuteStatementReq()
diff --git a/tests/hs2/test_hs2.py b/tests/hs2/test_hs2.py
index 795f45c..3408b53 100644
--- a/tests/hs2/test_hs2.py
+++ b/tests/hs2/test_hs2.py
@@ -24,9 +24,11 @@ import time
 from urllib2 import urlopen
 
 from ImpalaService import ImpalaHiveServer2Service
+from tests.common.skip import SkipIfDockerizedCluster
 from tests.hs2.hs2_test_suite import HS2TestSuite, needs_session, operation_id_to_query_id
 from TCLIService import TCLIService
 
+
 SQLSTATE_GENERAL_ERROR = "HY000"
 
 class TestHS2(HS2TestSuite):
@@ -117,6 +119,7 @@ class TestHS2(HS2TestSuite):
     # Removed options should not be returned.
     assert "MAX_IO_BUFFERS" not in vals
 
+  @SkipIfDockerizedCluster.internal_hostname
   def test_open_session_http_addr(self):
     """Check that OpenSession returns the coordinator's http address."""
     open_session_req = TCLIService.TOpenSessionReq()
diff --git a/tests/hs2/test_json_endpoints.py b/tests/hs2/test_json_endpoints.py
index a5e73da..a6b1d2c 100644
--- a/tests/hs2/test_json_endpoints.py
+++ b/tests/hs2/test_json_endpoints.py
@@ -22,15 +22,15 @@ import pytest
 
 from urllib2 import urlopen
 
+from tests.common.impala_cluster import ImpalaCluster
 from tests.hs2.hs2_test_suite import HS2TestSuite
 from TCLIService import TCLIService
 
 class TestJsonEndpoints(HS2TestSuite):
   def _get_json_queries(self, http_addr):
     """Get the json output of the /queries page from the impalad web UI at http_addr."""
-    resp = urlopen("http://%s/queries?json" % http_addr)
-    assert resp.msg == 'OK'
-    return json.loads(resp.read())
+    cluster = ImpalaCluster.get_e2e_test_cluster()
+    return cluster.impalads[0].service.get_debug_webpage_json("/queries")
 
   @pytest.mark.execute_serially
   def test_waiting_in_flight_queries(self):
diff --git a/tests/metadata/test_compute_stats.py b/tests/metadata/test_compute_stats.py
index e92f978..a9138b1 100644
--- a/tests/metadata/test_compute_stats.py
+++ b/tests/metadata/test_compute_stats.py
@@ -145,7 +145,8 @@ class TestComputeStats(ImpalaTestSuite):
        incremental statistics.
     """
     try:
-      client = ImpalaCluster().impalads[0].service.create_beeswax_client()
+      impalad = ImpalaCluster.get_e2e_test_cluster().impalads[0]
+      client = impalad.service.create_beeswax_client()
       create = "create table test like functional.alltypes"
       load = "insert into test partition(year, month) select * from functional.alltypes"
       insert = """insert into test partition(year=2009, month=1) values
@@ -156,7 +157,7 @@ class TestComputeStats(ImpalaTestSuite):
 
       # Checks that profile does not have metrics for incremental stats when
       # the operation is not 'compute incremental stats'.
-      self.execute_query_expect_success(client, "use %s" % unique_database)
+      self.execute_query_expect_success(client, "use `%s`" % unique_database)
       profile = self.execute_query_expect_success(client, create).runtime_profile
       assert profile.count("StatsFetch") == 0
       # Checks that incremental stats metrics are present when 'compute incremental
diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py
index af078d2..b696d95 100644
--- a/tests/metadata/test_ddl.py
+++ b/tests/metadata/test_ddl.py
@@ -532,7 +532,7 @@ class TestDdlStatements(TestDdlBase):
       num_attempts = 1
     else:
       num_attempts = 60
-    for impalad in ImpalaCluster().impalads:
+    for impalad in ImpalaCluster.get_e2e_test_cluster().impalads:
       client = impalad.service.create_beeswax_client()
       try:
         for attempt in itertools.count(1):
@@ -554,7 +554,7 @@ class TestDdlStatements(TestDdlBase):
 
   def test_views_describe(self, vector, unique_database):
     # IMPALA-6896: Tests that altered views can be described by all impalads.
-    impala_cluster = ImpalaCluster()
+    impala_cluster = ImpalaCluster.get_e2e_test_cluster()
     impalads = impala_cluster.impalads
     view_name = "%s.test_describe_view" % unique_database
     query_opts = vector.get_value('exec_option')
diff --git a/tests/observability/test_log_fragments.py b/tests/observability/test_log_fragments.py
index 44acf3a..b3d6afc 100644
--- a/tests/observability/test_log_fragments.py
+++ b/tests/observability/test_log_fragments.py
@@ -16,6 +16,7 @@
 # under the License.
 
 from tests.common.impala_test_suite import ImpalaTestSuite
+from tests.common.skip import SkipIfDockerizedCluster
 
 import re
 import time
@@ -24,6 +25,7 @@ import time
 class TestLogFragments(ImpalaTestSuite):
   """Checks that logging includes query/fragment ids when available."""
 
+  @SkipIfDockerizedCluster.daemon_logs_not_exposed
   def test_log_fragments(self):
     """Tests that fragment ids percolate through to the logs.
 
diff --git a/tests/query_test/test_hash_join_timer.py b/tests/query_test/test_hash_join_timer.py
index 59dd0a6..24b4cc0 100644
--- a/tests/query_test/test_hash_join_timer.py
+++ b/tests/query_test/test_hash_join_timer.py
@@ -103,7 +103,7 @@ class TestHashJoinTimer(ImpalaTestSuite):
     join_type = test_case[1]
 
     # Ensure that the cluster is idle before starting the test query.
-    for impalad in ImpalaCluster().impalads:
+    for impalad in ImpalaCluster.get_e2e_test_cluster().impalads:
       verifier = MetricVerifier(impalad.service)
       verifier.wait_for_metric("impala-server.num-fragments-in-flight", 0)
 
diff --git a/tests/query_test/test_hdfs_caching.py b/tests/query_test/test_hdfs_caching.py
index 64b36da..e3f55e8 100644
--- a/tests/query_test/test_hdfs_caching.py
+++ b/tests/query_test/test_hdfs_caching.py
@@ -22,7 +22,7 @@ import re
 import time
 from subprocess import check_call
 
-from tests.common.environ import build_flavor_timeout
+from tests.common.environ import build_flavor_timeout, IS_DOCKERIZED_TEST_CLUSTER
 from tests.common.impala_cluster import ImpalaCluster
 from tests.common.impala_test_suite import ImpalaTestSuite, LOG
 from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, \
@@ -58,7 +58,7 @@ class TestHdfsCaching(ImpalaTestSuite):
     cached_read_metric = "impala-server.io-mgr.cached-bytes-read"
     query_string = "select count(*) from tpch.nation"
     expected_bytes_delta = 2199
-    impala_cluster = ImpalaCluster()
+    impala_cluster = ImpalaCluster.get_e2e_test_cluster()
 
     # Collect the cached read metric on all the impalads before running the query
     cached_bytes_before = list()
@@ -85,7 +85,9 @@ class TestHdfsCaching(ImpalaTestSuite):
       if cached_bytes_after[i] > cached_bytes_before[i]:
         num_metrics_increased = num_metrics_increased + 1
 
-    if num_metrics_increased != 1:
+    if IS_DOCKERIZED_TEST_CLUSTER:
+      assert num_metrics_increased == 0, "HDFS caching is disabled in dockerised cluster."
+    elif num_metrics_increased != 1:
       # Test failed, print the metrics
       for i in range(0, len(cached_bytes_before)):
         print "%d %d" % (cached_bytes_before[i], cached_bytes_after[i])
diff --git a/tests/query_test/test_insert.py b/tests/query_test/test_insert.py
index e850e24..a55fdbe 100644
--- a/tests/query_test/test_insert.py
+++ b/tests/query_test/test_insert.py
@@ -25,7 +25,7 @@ from tests.common.impala_cluster import ImpalaCluster
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.parametrize import UniqueDatabase
 from tests.common.skip import SkipIfABFS, SkipIfEC, SkipIfLocal, \
-    SkipIfNotHdfsMinicluster, SkipIfS3
+    SkipIfNotHdfsMinicluster, SkipIfS3, SkipIfDockerizedCluster
 from tests.common.test_dimensions import (
     create_exec_option_dimension,
     create_uncompressed_text_dimension)
@@ -80,6 +80,7 @@ class TestInsertQueries(ImpalaTestSuite):
             v.get_value('compression_codec') == 'none'))
 
   @pytest.mark.execute_serially
+  @SkipIfDockerizedCluster.jvm_oom_large_string
   def test_insert_large_string(self, vector, unique_database):
     """Test handling of large strings in inserter and scanner."""
     if "-Xcheck:jni" in os.environ.get("LIBHDFS_OPTS", ""):
@@ -147,7 +148,8 @@ class TestInsertQueries(ImpalaTestSuite):
     # IMPALA-7023: These queries can linger and use up memory, causing subsequent
     # tests to hit memory limits. Wait for some time to allow the query to
     # be reclaimed.
-    verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]
+    verifiers = [MetricVerifier(i.service)
+                 for i in ImpalaCluster.get_e2e_test_cluster().impalads]
     for v in verifiers:
       v.wait_for_metric("impala-server.num-fragments-in-flight", 0, timeout=60)
 
diff --git a/tests/query_test/test_insert_behaviour.py b/tests/query_test/test_insert_behaviour.py
index bd0dd4e..3bcb0c1 100644
--- a/tests/query_test/test_insert_behaviour.py
+++ b/tests/query_test/test_insert_behaviour.py
@@ -23,7 +23,8 @@ import re
 
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.parametrize import UniqueDatabase
-from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, SkipIfLocal
+from tests.common.skip import (SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon,
+    SkipIfLocal, SkipIfDockerizedCluster)
 from tests.util.filesystem_utils import WAREHOUSE, get_fs_path, IS_S3
 
 @SkipIfLocal.hdfs_client
@@ -362,6 +363,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
   @SkipIfABFS.hdfs_acls
   @SkipIfADLS.hdfs_acls
   @SkipIfIsilon.hdfs_acls
+  @SkipIfDockerizedCluster.insert_acls
   def test_insert_acl_permissions(self, unique_database):
     """Test that INSERT correctly respects ACLs"""
     table = "`{0}`.`insert_acl_permissions`".format(unique_database)
@@ -566,6 +568,7 @@ class TestInsertBehaviour(ImpalaTestSuite):
   @SkipIfABFS.hdfs_acls
   @SkipIfADLS.hdfs_acls
   @SkipIfIsilon.hdfs_acls
+  @SkipIfDockerizedCluster.insert_acls
   def test_multiple_group_acls(self, unique_database):
     """Test that INSERT correctly respects multiple group ACLs"""
     table = "`{0}`.`insert_group_acl_permissions`".format(unique_database)
diff --git a/tests/query_test/test_kudu.py b/tests/query_test/test_kudu.py
index f5edac4..67ba8e2 100644
--- a/tests/query_test/test_kudu.py
+++ b/tests/query_test/test_kudu.py
@@ -1124,6 +1124,7 @@ class TestKuduMemLimits(KuduTestSuite):
     # additional minute. This ensures that the num fragments 'in flight' reaches 0 in
     # less time than IMPALA-4654 was reproducing (~60sec) but yet still enough time that
     # this test won't be flaky.
-    verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]
+    verifiers = [MetricVerifier(i.service)
+                 for i in ImpalaCluster.get_e2e_test_cluster().impalads]
     for v in verifiers:
       v.wait_for_metric("impala-server.num-fragments-in-flight", 0, timeout=30)
diff --git a/tests/query_test/test_lifecycle.py b/tests/query_test/test_lifecycle.py
index 7ed8f90..30b12c0 100644
--- a/tests/query_test/test_lifecycle.py
+++ b/tests/query_test/test_lifecycle.py
@@ -36,7 +36,8 @@ class TestFragmentLifecycleWithDebugActions(ImpalaTestSuite):
   @pytest.mark.execute_serially
   def test_failure_in_prepare(self):
     # Fail the scan node
-    verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]
+    verifiers = [MetricVerifier(i.service)
+                 for i in ImpalaCluster.get_e2e_test_cluster().impalads]
     self.client.execute("SET DEBUG_ACTION='-1:0:PREPARE:FAIL'");
     try:
       self.client.execute("SELECT COUNT(*) FROM functional.alltypes")
@@ -51,7 +52,8 @@ class TestFragmentLifecycleWithDebugActions(ImpalaTestSuite):
   def test_failure_in_prepare_multi_fragment(self):
     # Test that if one fragment fails that the others are cleaned up during the ensuing
     # cancellation.
-    verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]
+    verifiers = [MetricVerifier(i.service)
+                 for i in ImpalaCluster.get_e2e_test_cluster().impalads]
     # Fail the scan node
     self.client.execute("SET DEBUG_ACTION='-1:0:PREPARE:FAIL'");
 
diff --git a/tests/query_test/test_local_fs.py b/tests/query_test/test_local_fs.py
index 0eb218b..dd891db 100644
--- a/tests/query_test/test_local_fs.py
+++ b/tests/query_test/test_local_fs.py
@@ -18,6 +18,7 @@
 # Validates table stored on the LocalFileSystem.
 #
 from tests.common.impala_test_suite import ImpalaTestSuite
+from tests.common.skip import SkipIfDockerizedCluster
 from tests.common.test_dimensions import create_single_exec_option_dimension
 
 class TestLocalFileSystem(ImpalaTestSuite):
@@ -34,5 +35,6 @@ class TestLocalFileSystem(ImpalaTestSuite):
         v.get_value('table_format').file_format == 'text' and \
         v.get_value('table_format').compression_codec == 'none')
 
+  @SkipIfDockerizedCluster.accesses_host_filesystem
   def test_local_filesystem(self, vector, unique_database):
     self.run_test_case('QueryTest/local-filesystem', vector, unique_database)
diff --git a/tests/query_test/test_mem_usage_scaling.py b/tests/query_test/test_mem_usage_scaling.py
index 69b4f2b..7e36149 100644
--- a/tests/query_test/test_mem_usage_scaling.py
+++ b/tests/query_test/test_mem_usage_scaling.py
@@ -224,7 +224,7 @@ class TestTpchMemLimitError(TestLowMemoryLimits):
     self.low_memory_limit_test(vector, 'tpch-q14', self.MIN_MEM_FOR_TPCH['Q14'])
     self.low_memory_limit_test(vector, 'tpch-q18', self.MIN_MEM_FOR_TPCH['Q18'])
     self.low_memory_limit_test(vector, 'tpch-q20', self.MIN_MEM_FOR_TPCH['Q20'])
-    for impalad in ImpalaCluster().impalads:
+    for impalad in ImpalaCluster.get_e2e_test_cluster().impalads:
       verifier = MetricVerifier(impalad.service)
       verifier.wait_for_metric("impala-server.num-fragments-in-flight", 0)
 
diff --git a/tests/query_test/test_queries.py b/tests/query_test/test_queries.py
index bca2e3f..2a44fce 100644
--- a/tests/query_test/test_queries.py
+++ b/tests/query_test/test_queries.py
@@ -22,7 +22,7 @@ import re
 from copy import deepcopy
 
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.skip import SkipIfEC
+from tests.common.skip import SkipIfEC, SkipIfDockerizedCluster
 from tests.common.test_dimensions import (
     create_uncompressed_text_dimension, extend_exec_option_dimension,
     create_beeswax_hs2_dimension, hs2_parquet_constraint)
@@ -204,6 +204,7 @@ class TestQueriesParquetTables(ImpalaTestSuite):
     return 'functional-query'
 
   @SkipIfEC.oom
+  @SkipIfDockerizedCluster.jvm_oom_large_string
   @pytest.mark.execute_serially
   def test_very_large_strings(self, vector):
     """Regression test for IMPALA-1619. Doesn't need to be run on all file formats.
diff --git a/tests/query_test/test_udfs.py b/tests/query_test/test_udfs.py
index ab62df3..75a1686 100644
--- a/tests/query_test/test_udfs.py
+++ b/tests/query_test/test_udfs.py
@@ -45,7 +45,7 @@ class TestUdfBase(ImpalaTestSuite):
     raise e
 
   def _run_query_all_impalads(self, exec_options, query, expected):
-    impala_cluster = ImpalaCluster()
+    impala_cluster = ImpalaCluster.get_e2e_test_cluster()
     for impalad in impala_cluster.impalads:
       client = impalad.service.create_beeswax_client()
       result = self.execute_query_expect_success(client, query, exec_options)
@@ -359,7 +359,7 @@ class TestUdfExecution(TestUdfBase):
     # It takes a long time for Impala to free up memory after this test, especially if
     # ASAN is enabled. Verify that all fragments finish executing before moving on to the
     # next test to make sure that the next test is not affected.
-    for impalad in ImpalaCluster().impalads:
+    for impalad in ImpalaCluster.get_e2e_test_cluster().impalads:
       verifier = MetricVerifier(impalad.service)
       verifier.wait_for_metric("impala-server.num-fragments-in-flight", 0)
       verifier.verify_num_unused_buffers()
@@ -467,7 +467,7 @@ class TestUdfTargeted(TestUdfBase):
         "create function `{0}`.`pi_missing_jar`() returns double location '{1}' "
         "symbol='org.apache.hadoop.hive.ql.udf.UDFPI'".format(unique_database, jar_path))
 
-    cluster = ImpalaCluster()
+    cluster = ImpalaCluster.get_e2e_test_cluster()
     impalad = cluster.get_any_impalad()
     client = impalad.service.create_beeswax_client()
     # Create and drop functions with sync_ddl to make sure they are reflected
diff --git a/tests/run-tests.py b/tests/run-tests.py
index 0de9ce9..3e7dbeb 100755
--- a/tests/run-tests.py
+++ b/tests/run-tests.py
@@ -225,9 +225,9 @@ def build_ignore_dir_arg_list(valid_dirs):
 
 def print_metrics(substring):
   """Prints metrics with the given substring in the name"""
-  for impalad in ImpalaCluster().impalads:
+  for impalad in ImpalaCluster.get_e2e_test_cluster().impalads:
     print ">" * 80
-    port = impalad._get_webserver_port()
+    port = impalad.get_webserver_port()
     print "connections metrics for impalad at port {0}:".format(port)
     debug_info = json.loads(ImpaladService(impalad.hostname, webserver_port=port)
         .read_debug_webpage('metrics?json'))
diff --git a/tests/statestore/test_statestore.py b/tests/statestore/test_statestore.py
index 85f42dc..b69b3f2 100644
--- a/tests/statestore/test_statestore.py
+++ b/tests/statestore/test_statestore.py
@@ -40,6 +40,7 @@ from ErrorCodes.ttypes import TErrorCode
 from Status.ttypes import TStatus
 
 from tests.common.environ import build_flavor_timeout
+from tests.common.skip import SkipIfDockerizedCluster
 
 LOG = logging.getLogger('test_statestore')
 
@@ -159,7 +160,6 @@ class KillableThreadedServer(TServer):
     itrans.close()
     otrans.close()
 
-
 class StatestoreSubscriber(object):
   """A bare-bones subscriber skeleton. Tests should create a new StatestoreSubscriber(),
   call start() and then register(). The subscriber will run a Thrift server on an unused
@@ -341,6 +341,8 @@ class StatestoreSubscriber(object):
             self.subscriber_id, timeout))
       time.sleep(0.2)
 
+
+@SkipIfDockerizedCluster.statestore_not_exposed
 class TestStatestore():
   def make_topic_update(self, topic_name, key_template="foo", value_template="bar",
                         num_updates=1, clear_topic_entries=False):
diff --git a/tests/stress/test_mini_stress.py b/tests/stress/test_mini_stress.py
index d91163d..9aec1f4 100644
--- a/tests/stress/test_mini_stress.py
+++ b/tests/stress/test_mini_stress.py
@@ -73,7 +73,7 @@ class TestMiniStress(ImpalaTestSuite):
     don't cause failures with other running workloads and ensures catalog versions
     are strictly increasing."""
     target_db = self.execute_scalar('select current_database()', vector=vector)
-    impala_cluster = ImpalaCluster()
+    impala_cluster = ImpalaCluster.get_e2e_test_cluster()
     impalad = impala_cluster.impalads[0].service
     catalogd = impala_cluster.catalogd.service
 
diff --git a/tests/webserver/test_web_pages.py b/tests/webserver/test_web_pages.py
index fd7fe80..463e1fe 100644
--- a/tests/webserver/test_web_pages.py
+++ b/tests/webserver/test_web_pages.py
@@ -128,7 +128,7 @@ class TestWebPage(ImpalaTestSuite):
     """Test that /query_profile_encoded error message starts with the expected line in
     case of missing query and does not contain any leading whitespace.
     """
-    cluster = ImpalaCluster()
+    cluster = ImpalaCluster.get_e2e_test_cluster()
     impalad = cluster.get_any_impalad()
     result = impalad.service.read_debug_webpage("query_profile_encoded?query_id=123")
     assert result.startswith("Could not obtain runtime profile: Query id")