You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2022/06/13 17:15:26 UTC

[impala] 03/05: IMPALA-11314: Test PyPI package with system python

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 5263d13112bc35ae69e2c2c09adc823a96fc8c13
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Thu Jun 2 10:02:23 2022 -0700

    IMPALA-11314: Test PyPI package with system python
    
    Sets up a virtualenv with system python to install the impala-shell PyPI
    package into. Using system python provides better coverage for Python
    versions likely to be used by customers. Runs impala-shell tests using
    the PyPI package to provide better coverage for the artifact customers
    will use.
    
    Includes a PyPI install in notests_independent_targets because these
    seem to be used for Python testing despite -notests.
    
    Change-Id: I384ea6a7dab51945828cca629860400a23fa0c05
    Reviewed-on: http://gerrit.cloudera.org:8080/18586
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
    Tested-by: Joe McDonnell <jo...@cloudera.com>
---
 CMakeLists.txt                          | 11 +---
 bin/impala-shell.sh                     |  3 +-
 bin/impala-virtualenv                   | 21 ++++++++
 shell/CMakeLists.txt                    | 45 ++++++++++++++++
 tests/custom_cluster/test_client_ssl.py |  3 +-
 tests/query_test/test_date_queries.py   |  3 +-
 tests/shell/test_shell_commandline.py   | 28 +++++-----
 tests/shell/test_shell_interactive.py   | 40 ++++----------
 tests/shell/util.py                     | 96 +++++++++++++++------------------
 9 files changed, 140 insertions(+), 110 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 857d295d6..30280b12e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -451,6 +451,7 @@ add_subdirectory(common/protobuf)
 add_subdirectory(be)
 add_subdirectory(docker)
 add_subdirectory(java)
+add_subdirectory(shell)
 
 # Build target for all generated files which most backend code depends on
 add_custom_target(gen-deps ALL DEPENDS thrift-deps proto-deps fb-deps
@@ -458,14 +459,6 @@ add_custom_target(gen-deps ALL DEPENDS thrift-deps proto-deps fb-deps
 
 add_custom_target(tarballs ALL DEPENDS shell_tarball)
 
-add_custom_target(shell_tarball DEPENDS gen-deps
-  COMMAND "${CMAKE_SOURCE_DIR}/shell/make_shell_tarball.sh"
-)
-
-add_custom_target(shell_pypi_package DEPENDS shell_tarball
-  COMMAND "${CMAKE_SOURCE_DIR}/shell/packaging/make_python_package.sh"
-)
-
 add_custom_target(cscope ALL DEPENDS gen-deps
   COMMAND "${CMAKE_SOURCE_DIR}/bin/gen-cscope.sh"
 )
@@ -475,7 +468,7 @@ add_custom_target(impala_python ALL
 )
 
 add_custom_target(notests_independent_targets DEPENDS
-  java cscope tarballs impala_python
+  java cscope tarballs impala_python shell_python2_install
 )
 add_custom_target(notests_regular_targets DEPENDS
   impalad statestored catalogd admissiond fesupport loggingsupport ImpalaUdf udasample udfsample impala-profile-tool
diff --git a/bin/impala-shell.sh b/bin/impala-shell.sh
index 7181d50c8..65f6b6b30 100755
--- a/bin/impala-shell.sh
+++ b/bin/impala-shell.sh
@@ -38,7 +38,8 @@ export LD_LIBRARY_PATH=":$(PYTHONPATH=${PYTHONPATH} \
 
 IMPALA_PY_DIR="$(dirname "$0")/../infra/python"
 IMPALA_PY_ENV_DIR="${IMPALA_PY_DIR}/env-gcc${IMPALA_GCC_VERSION}"
-IMPALA_PYTHON_EXECUTABLE="${IMPALA_PY_ENV_DIR}/bin/python"
+# Allow overriding the python executable
+IMPALA_PYTHON_EXECUTABLE="${IMPALA_PYTHON_EXECUTABLE:-${IMPALA_PY_ENV_DIR}/bin/python}"
 
 for PYTHON_LIB_DIR in ${THRIFT_PY_ROOT}/python/lib{64,}; do
   [[ -d ${PYTHON_LIB_DIR} ]] || continue
diff --git a/bin/impala-virtualenv b/bin/impala-virtualenv
new file mode 100755
index 000000000..8f4e58f5d
--- /dev/null
+++ b/bin/impala-virtualenv
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+source "$(dirname "$0")/impala-python-common.sh"
+exec "$PY_ENV_DIR/bin/python" "$PY_ENV_DIR/bin/virtualenv" "$@"
diff --git a/shell/CMakeLists.txt b/shell/CMakeLists.txt
new file mode 100644
index 000000000..2ac96a4ca
--- /dev/null
+++ b/shell/CMakeLists.txt
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+add_custom_target(shell_tarball DEPENDS gen-deps
+  COMMAND "${CMAKE_SOURCE_DIR}/shell/make_shell_tarball.sh"
+)
+
+add_custom_target(shell_pypi_package DEPENDS shell_tarball
+  COMMAND "${CMAKE_SOURCE_DIR}/shell/packaging/make_python_package.sh"
+)
+
+# A separate package target is needed because without OFFICIAL the file name is
+# non-deterministic. Uses a custom target to synchronize for multiple dependents.
+set(SHELL_TEST_PKG
+  "${CMAKE_SOURCE_DIR}/shell/build/dist/impala_shell-install-test.tar.gz")
+get_filename_component(SHELL_TEST_PKG_DIR "${SHELL_TEST_PKG}" DIRECTORY)
+# Generates SHELL_TEST_PKG
+add_custom_target(shell_pypi_test_package DEPENDS shell_tarball
+  COMMAND env BUILD_VERSION=install-test OFFICIAL=true DIST_DIR="${SHELL_TEST_PKG_DIR}"
+    "${CMAKE_SOURCE_DIR}/shell/packaging/make_python_package.sh"
+)
+
+set(PYTHON2_VENV "${CMAKE_SOURCE_DIR}/shell/build/py2_venv")
+
+add_custom_command(OUTPUT "${PYTHON2_VENV}" DEPENDS impala_python
+  COMMAND impala-virtualenv --python python2 "${PYTHON2_VENV}"
+)
+
+add_custom_target(shell_python2_install ALL DEPENDS "${PYTHON2_VENV}" shell_pypi_test_package
+  COMMAND "${PYTHON2_VENV}/bin/pip" install "${SHELL_TEST_PKG}"
+)
diff --git a/tests/custom_cluster/test_client_ssl.py b/tests/custom_cluster/test_client_ssl.py
index 4add40501..0c56948fd 100644
--- a/tests/custom_cluster/test_client_ssl.py
+++ b/tests/custom_cluster/test_client_ssl.py
@@ -32,7 +32,7 @@ from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.common.impala_service import ImpaladService
 from tests.common.test_dimensions import create_client_protocol_dimension
 from tests.shell.util import run_impala_shell_cmd, run_impala_shell_cmd_no_expect, \
-    ImpalaShell
+    ImpalaShell, create_impala_shell_executable_dimension
 
 REQUIRED_MIN_OPENSSL_VERSION = 0x10001000L
 # Python supports TLSv1.2 from 2.7.9 officially but on Red Hat/CentOS Python2.7.5
@@ -142,6 +142,7 @@ class TestClientSsl(CustomClusterTestSuite):
   def add_test_dimensions(cls):
     super(TestClientSsl, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
+    cls.ImpalaTestMatrix.add_dimension(create_impala_shell_executable_dimension())
 
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(impalad_args=WEBSERVER_SSL_ARGS,
diff --git a/tests/query_test/test_date_queries.py b/tests/query_test/test_date_queries.py
index a2cc1126c..34622f263 100644
--- a/tests/query_test/test_date_queries.py
+++ b/tests/query_test/test_date_queries.py
@@ -24,7 +24,7 @@ from tests.common.skip import (SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfLocal, Sk
                                SkipIfCOS)
 from tests.common.test_dimensions import (create_exec_option_dimension_from_dict,
     create_client_protocol_dimension, hs2_parquet_constraint)
-from tests.shell.util import ImpalaShell
+from tests.shell.util import create_impala_shell_executable_dimension
 
 
 class TestDateQueries(ImpalaTestSuite):
@@ -50,6 +50,7 @@ class TestDateQueries(ImpalaTestSuite):
     # via both protocols.
     cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
     cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint)
+    cls.ImpalaTestMatrix.add_dimension(create_impala_shell_executable_dimension())
 
   def test_queries(self, vector):
     if vector.get_value('table_format').file_format == 'avro':
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index 7f146855e..abfec1be5 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -39,8 +39,8 @@ from tests.common.test_dimensions import (
   create_uncompressed_text_dimension, create_single_exec_option_dimension)
 from time import sleep, time
 from util import (get_impalad_host_port, assert_var_substitution, run_impala_shell_cmd,
-                  ImpalaShell, IMPALA_SHELL_EXECUTABLE, SHELL_IS_PYTHON_2,
-                  build_shell_env, wait_for_query_state)
+                  ImpalaShell, build_shell_env, wait_for_query_state,
+                  create_impala_shell_executable_dimension, get_impala_shell_executable)
 from contextlib import closing
 
 
@@ -144,7 +144,9 @@ class TestImpalaShell(ImpalaTestSuite):
     cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
     cls.ImpalaTestMatrix.add_dimension(create_client_protocol_strict_dimension())
     cls.ImpalaTestMatrix.add_constraint(lambda v:
-          v.get_value('protocol') != 'beeswax' or not v.get_value('strict_hs2_protocol'))
+        v.get_value('protocol') != 'beeswax' or not v.get_value('strict_hs2_protocol'))
+    # Test with python2 and the raw tarball
+    cls.ImpalaTestMatrix.add_dimension(create_impala_shell_executable_dimension())
 
   def test_no_args(self, vector):
     args = ['-q', DEFAULT_QUERY]
@@ -867,7 +869,8 @@ class TestImpalaShell(ImpalaTestSuite):
     # Building an one-off shell command instead of using Util::ImpalaShell since we need
     # to customize the impala daemon socket.
     protocol = vector.get_value("protocol")
-    shell_cmd = [IMPALA_SHELL_EXECUTABLE, "--protocol={0}".format(protocol)]
+    impala_shell_executable = get_impala_shell_executable(vector)
+    shell_cmd = [impala_shell_executable, "--protocol={0}".format(protocol)]
     if protocol == 'beeswax':
       expected_output = "get_default_configuration"
     else:
@@ -1103,16 +1106,13 @@ class TestImpalaShell(ImpalaTestSuite):
     assert "1\t1\t10.1" in result.stdout, result.stdout
     assert "2\t2\t20.2" in result.stdout, result.stdout
 
-    if (vector.get_value("protocol") in ('hs2', 'hs2-http')) and not SHELL_IS_PYTHON_2:
-      # The HS2 client returns binary values for float/double types, and these must
-      # be converted to strings for display. However, due to differences between the
-      # way that python2 and python3 represent floating point values, the output
-      # from the shell will differ with regard to which version of python the
-      # shell is running under.
-      assert "3\t3\t30.299999999999997" in result.stdout, result.stdout
-    else:
-      # python 2, or python 3 with beeswax protocol
-      assert "3\t3\t30.3" in result.stdout, result.stdout
+    # The HS2 client returns binary values for float/double types, and these must
+    # be converted to strings for display. However, due to differences between the
+    # way that python2 and python3 represent floating point values, the output
+    # from the shell will differ with regard to which version of python the
+    # shell is running under.
+    assert("3\t3\t30.299999999999997" in result.stdout or
+      "3\t3\t30.3" in result.stdout), result.stdout
 
     assert "4\t4\t40.4" in result.stdout, result.stdout
 
diff --git a/tests/shell/test_shell_interactive.py b/tests/shell/test_shell_interactive.py
index 300b042e6..143e3b4a1 100755
--- a/tests/shell/test_shell_interactive.py
+++ b/tests/shell/test_shell_interactive.py
@@ -43,9 +43,9 @@ from tests.common.skip import SkipIfLocal
 from tests.common.test_dimensions import (
   create_client_protocol_dimension, create_client_protocol_strict_dimension,
   create_uncompressed_text_dimension, create_single_exec_option_dimension)
-from tests.shell.util import get_unused_port
 from util import (assert_var_substitution, ImpalaShell, get_impalad_port, get_shell_cmd,
-                  get_open_sessions_metric, IMPALA_SHELL_EXECUTABLE, spawn_shell)
+                  get_open_sessions_metric, spawn_shell, get_unused_port,
+                  create_impala_shell_executable_dimension, get_impala_shell_executable)
 import SimpleHTTPServer
 import SocketServer
 
@@ -175,7 +175,9 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
     cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
     cls.ImpalaTestMatrix.add_dimension(create_client_protocol_strict_dimension())
     cls.ImpalaTestMatrix.add_constraint(lambda v:
-          v.get_value('protocol') != 'beeswax' or not v.get_value('strict_hs2_protocol'))
+        v.get_value('protocol') != 'beeswax' or not v.get_value('strict_hs2_protocol'))
+    # Test with python2 and the raw tarball
+    cls.ImpalaTestMatrix.add_dimension(create_impala_shell_executable_dimension())
 
   def _expect_with_cmd(self, proc, cmd, vector, expectations=(), db="default"):
     """Executes a command on the expect process instance and verifies a set of
@@ -899,7 +901,7 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
   def test_line_with_leading_comment(self, vector, unique_database):
     # IMPALA-2195: A line with a comment produces incorrect command.
     if vector.get_value('strict_hs2_protocol'):
-      pytest.skip("Leading omments not supported in strict hs2 mode.")
+      pytest.skip("Leading comments not supported in strict hs2 mode.")
     table = "{0}.leading_comment".format(unique_database)
     run_impala_shell_interactive(vector, 'create table {0} (i int);'.format(table))
     result = run_impala_shell_interactive(vector, '-- comment\n'
@@ -984,30 +986,6 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
   def test_fix_infinite_loop(self, vector):
     # IMPALA-6337: Fix infinite loop.
 
-    # In case of TL;DR:
-    # - see IMPALA-9362 for details
-    # - see tests/shell/util.py for explanation of IMPALA_SHELL_EXECUTABLE
-    if os.getenv("IMPALA_HOME") not in IMPALA_SHELL_EXECUTABLE:
-      # The fix for IMPALA-6337 involved patching our internal verison of
-      # sqlparse 0.1.19 in ${IMPALA_HOME}/shell/ext-py. However, when we
-      # create the the stand-alone python package of the impala-shell for PyPI,
-      # we don't include the bundled 3rd party libs -- we expect users to
-      # install 3rd upstream libraries from PyPI.
-      #
-      # We could try to bundle sqlparse with the PyPI package, but there we
-      # run into the issue that the our bundled version is not python 3
-      # compatible. The real fix for this would be to upgrade to sqlparse 0.3.0,
-      # but that's not without complications. See IMPALA-9362 for details.
-      #
-      # For the time being, what this means is that IMPALA-6337 is fixed for
-      # people who are running the shell locally from any host/node that's part
-      # of a cluster where Impala is installed, but if they are running a
-      # standalone version of the shell on a client outside of a cluster, then
-      # they will still be relying on the upstream version of sqlparse 0.1.19,
-      # and so they may still be affected by the IMPALA-6337.
-      #
-      pytest.skip("Test will fail if shell is not part of dev environment.")
-
     result = run_impala_shell_interactive(vector, "select 1 + 1; \"\n;\";")
     if vector.get_value('strict_hs2_protocol'):
       assert '| 2   |' in result.stdout
@@ -1194,9 +1172,10 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
       pytest.skip()
 
     # Check that we get a message about the 503 error when we try to connect.
+    impala_shell_executable = get_impala_shell_executable(vector)
     shell_args = ["--protocol={0}".format(protocol),
                   "-i{0}:{1}".format(http_503_server.HOST, http_503_server.PORT)]
-    shell_proc = spawn_shell([IMPALA_SHELL_EXECUTABLE] + shell_args)
+    shell_proc = spawn_shell([impala_shell_executable] + shell_args)
     shell_proc.expect("HTTP code 503", timeout=10)
 
   def test_http_interactions_extra(self, vector, http_503_server_extra):
@@ -1208,10 +1187,11 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
       pytest.skip()
 
     # Check that we get a message about the 503 error when we try to connect.
+    impala_shell_executable = get_impala_shell_executable(vector)
     shell_args = ["--protocol={0}".format(protocol),
                   "-i{0}:{1}".format(http_503_server_extra.HOST,
                                      http_503_server_extra.PORT)]
-    shell_proc = spawn_shell([IMPALA_SHELL_EXECUTABLE] + shell_args)
+    shell_proc = spawn_shell([impala_shell_executable] + shell_args)
     shell_proc.expect("HTTP code 503: Service Unavailable \[EXTRA\]", timeout=10)
 
 
diff --git a/tests/shell/util.py b/tests/shell/util.py
index fc0365f3b..7a4ac21dc 100755
--- a/tests/shell/util.py
+++ b/tests/shell/util.py
@@ -37,6 +37,7 @@ from tests.common.impala_service import ImpaladService
 from tests.common.impala_test_suite import (IMPALAD_BEESWAX_HOST_PORT,
     IMPALAD_HS2_HOST_PORT, IMPALAD_HS2_HTTP_HOST_PORT,
     STRICT_HS2_HOST_PORT, STRICT_HS2_HTTP_HOST_PORT)
+from tests.common.test_vector import ImpalaTestDimension
 
 LOG = logging.getLogger('tests/shell/util.py')
 LOG.addHandler(logging.StreamHandler())
@@ -44,22 +45,6 @@ LOG.addHandler(logging.StreamHandler())
 SHELL_HISTORY_FILE = os.path.expanduser("~/.impalahistory")
 IMPALA_HOME = os.environ['IMPALA_HOME']
 
-# Note that pytest.config.getoption is deprecated usage. We use this
-# in a couple of other places. Ultimately, it needs to be addressed if
-# we ever want to get off of pytest 2.9.2.
-IMPALA_SHELL_EXECUTABLE = pytest.config.getoption('shell_executable')
-
-if IMPALA_SHELL_EXECUTABLE is None:
-  if ImpalaTestClusterProperties.get_instance().is_remote_cluster():
-    # With remote cluster testing, we cannot assume that the shell was built locally.
-    IMPALA_SHELL_EXECUTABLE = os.path.join(IMPALA_HOME, "bin/impala-shell.sh")
-  else:
-    # Test the locally built shell distribution.
-    IMPALA_SHELL_EXECUTABLE = os.path.join(
-        IMPALA_HOME, "shell/build", "impala-shell-" + IMPALA_LOCAL_BUILD_VERSION,
-        "impala-shell")
-
-
 def build_shell_env(env=None):
   """ Construct the environment for the shell to run in based on 'env', or the current
   process's environment if env is None."""
@@ -74,41 +59,6 @@ def build_shell_env(env=None):
   return env
 
 
-def get_python_version_for_shell_env():
-  """
-  Return the version of python belonging to the tested IMPALA_SHELL_EXECUTABLE.
-
-  We need this because some tests behave differently based on the version of
-  python being used to execute the impala-shell. However, since the test
-  framework itself is still being run with python2.7.x, sys.version_info
-  alone can't help us to determine the python version for the environment of
-  the shell executable. Instead, we have to invoke the shell, and then parse
-  the python version from the output. This information is present even in the
-  case of a fatal shell exception, e.g., not being unable to establish a
-  connection to an impalad.
-  """
-  version_check = Popen([IMPALA_SHELL_EXECUTABLE, '-q', 'version()'],
-                        stdout=PIPE, stderr=PIPE, env=build_shell_env())
-  stdout, stderr = version_check.communicate()
-
-  # e.g. Starting Impala with Kerberos authentication using Python 3.7.6
-  start_msg_line = stderr.split('\n')[0]
-  py_version = start_msg_line.split()[-1]   # e.g. 3.7.6
-  try:
-    major_version, minor_version, micro_version = py_version.split('.')
-    ret_val = int(major_version)
-  except (ValueError, UnboundLocalError) as e:
-    LOG.error(stderr)
-    sys.exit("Could not determine python version in shell env: {}".format(str(e)))
-
-  return ret_val
-
-
-# Since both test_shell_commandline and test_shell_interactive import from
-# this file, this check will be forced before any tests are run.
-SHELL_IS_PYTHON_2 = True if (get_python_version_for_shell_env() == 2) else False
-
-
 def assert_var_substitution(result):
   assert_pattern(r'\bfoo_number=.*$', 'foo_number= 123123', result.stdout, \
     'Numeric values not replaced correctly')
@@ -222,15 +172,16 @@ def get_impalad_port(vector):
 def get_shell_cmd(vector):
   """Get the basic shell command to start the shell, given the provided test vector.
   Returns the command as a list of string arguments."""
+  impala_shell_executable = get_impala_shell_executable(vector)
   if vector.get_value_with_default("strict_hs2_protocol", False):
     protocol = vector.get_value("protocol")
-    return [IMPALA_SHELL_EXECUTABLE,
+    return [impala_shell_executable,
             "--protocol={0}".format(protocol),
             "--strict_hs2_protocol",
             "--use_ldap_test_password",
             "-i{0}".format(get_impalad_host_port(vector))]
   else:
-    return [IMPALA_SHELL_EXECUTABLE,
+    return [impala_shell_executable,
             "--protocol={0}".format(vector.get_value("protocol")),
             "-i{0}".format(get_impalad_host_port(vector))]
 
@@ -257,7 +208,7 @@ class ImpalaShellResult(object):
 
 
 class ImpalaShell(object):
-  """A single instance of the Impala shell. The proces is started when this object is
+  """A single instance of the Impala shell. The process is started when this object is
      constructed, and then users should repeatedly call send_cmd(), followed eventually by
      get_result() to retrieve the process output. This constructor will wait until
      Impala shell is connected for the specified timeout unless wait_until_connected is
@@ -349,3 +300,40 @@ def wait_for_query_state(vector, stmt, state, max_retry=15):
     retry_count += 1
     time.sleep(1.0)
   raise Exception("Query didn't reach desired state: " + state)
+
+
+# Returns shell executable, and whether to include pypi variants
+def get_dev_impala_shell_executable():
+  # Note that pytest.config.getoption is deprecated usage. We use this
+  # in a couple of other places. Ultimately, it needs to be addressed if
+  # we ever want to get off of pytest 2.9.2.
+  impala_shell_executable = pytest.config.getoption('shell_executable')
+
+  if impala_shell_executable is not None:
+    return impala_shell_executable, False
+
+  if ImpalaTestClusterProperties.get_instance().is_remote_cluster():
+    # With remote cluster testing, we cannot assume that the shell was built locally.
+    return os.path.join(IMPALA_HOME, "bin/impala-shell.sh"), False
+  else:
+    # Test the locally built shell distribution.
+    return os.path.join(IMPALA_HOME, "shell/build",
+        "impala-shell-" + IMPALA_LOCAL_BUILD_VERSION, "impala-shell"), True
+
+
+def create_impala_shell_executable_dimension():
+  _, include_pypi = get_dev_impala_shell_executable()
+  if include_pypi:
+    return ImpalaTestDimension('impala_shell', 'dev', 'python2')
+  else:
+    return ImpalaTestDimension('impala_shell', 'dev')
+
+
+def get_impala_shell_executable(vector):
+  # impala-shell is invoked some places where adding a test vector may not make sense;
+  # use 'dev' as the default.
+  impala_shell_executable, _ = get_dev_impala_shell_executable()
+  return {
+    'dev': impala_shell_executable,
+    'python2': os.path.join(IMPALA_HOME, 'shell/build/py2_venv/bin/impala-shell')
+  }[vector.get_value_with_default('impala_shell', 'dev')]