You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2023/03/09 17:22:50 UTC

[impala] 01/06: IMPALA-11959: Add Python 3 virtualenv

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 566df808913aef6ff5eecc3849e14df8370bd651
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Tue Apr 6 21:36:07 2021 -0700

    IMPALA-11959: Add Python 3 virtualenv
    
    This adds a Python 3 equivalent to the impala-python
    virtualenv base on the toolchain Python 3.7.16.
    This modifies bootstrap_virtualenv.py to support
    the two different modes. This adds py2-requirements.txt
    and py3-requirements.txt to allow some differences
    between the Python 2 and Python 3 virtualenvs.
    
    Here are some specific package changes:
     - allpairs is replaced with allpairspy, as allpairs did
       not support Python 3.
     - requests is upgraded slightly, because otherwise is has issues
       with idna==2.8.
     - pylint is limited to Python 3, because we are adding it
       and don't need it on both
     - flake8 is limited to Python 2, because it will take
       some work to switch to a version that works on Python 3
     - cm_api is limited to Python 2, because it doesn't support
       Python 3
     - pytest-random does not support Python 3 and it is unused,
       so it is removed
     - Bump the version of setuptool-scm to support Python 3
    
    This adds impala-pylint, which can be used to do further
    Python 3 checks via --py3k. This also adds a bin/check-pylint-py3k.sh
    script to enforce specific py3k checks. The banned py3k warnings
    are specified in the bin/banned_py3k_warnings.txt. This is currently
    empty, but this can ratchet up the py3k strictness over time
    to avoid regressions.
    
    This pulls in a new toolchain with the fix for IMPALA-11956
    to get Python 3.7.16.
    
    Testing:
     - Hand tested that the allpairs libraries produce the
       same results
     - The python3 virtualenv has no influence on regular
       tests yet
    
    Change-Id: Ica4853f440c9a46a79bd5fb8e0a66730b0b4efc0
    Reviewed-on: http://gerrit.cloudera.org:8080/19567
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
    Tested-by: Joe McDonnell <jo...@cloudera.com>
---
 CMakeLists.txt                                     |   6 +-
 bin/banned_py3k_warnings.txt                       |   0
 bin/bootstrap_toolchain.py                         |   3 +
 bin/check-pylint-py3k.sh                           | 140 +++++++++++++++++++
 bin/impala-config.sh                               |   3 +-
 .../setuptools-requirements.txt => bin/impala-pip3 |   8 +-
 .../impala-pylint                                  |   8 +-
 .../impala-python3                                 |  10 +-
 ...t-impala-python.sh => impala-python3-common.sh} |  27 ++--
 bin/init-impala-python.sh                          |  26 +++-
 bin/rat_exclude_files.txt                          |   1 +
 infra/python/bootstrap_virtualenv.py               | 150 ++++++++++++++-------
 infra/python/deps/pip_download.py                  |   3 +-
 ...tools-requirements.txt => py2-requirements.txt} |  21 ++-
 ...tools-requirements.txt => py3-requirements.txt} |  18 ++-
 infra/python/deps/requirements.txt                 |  20 +--
 infra/python/deps/setuptools-requirements.txt      |   2 +-
 testdata/bin/generate-test-vectors.py              |   3 +-
 tests/common/test_vector.py                        |   4 +-
 tests/custom_cluster/test_hs2_fault_injection.py   |   2 -
 tests/query_test/test_decimal_casting.py           |   2 +-
 21 files changed, 349 insertions(+), 108 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index cb35b393a..2bfc50f5d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -514,6 +514,10 @@ add_custom_target(impala_python ALL
   COMMAND "${CMAKE_SOURCE_DIR}/bin/init-impala-python.sh"
 )
 
+add_custom_target(impala_python3 ALL
+  COMMAND "${CMAKE_SOURCE_DIR}/bin/init-impala-python.sh" "-python3"
+)
+
 set(IMPALA_PYTHON_INSTALLS "")
 if (NOT $ENV{IMPALA_SYSTEM_PYTHON2} EQUAL "")
   list(APPEND IMPALA_PYTHON_INSTALLS shell_python2_install)
@@ -524,7 +528,7 @@ endif()
 add_custom_target(impala_shell_pypi ALL DEPENDS ${IMPALA_PYTHON_INSTALLS})
 
 add_custom_target(notests_independent_targets DEPENDS
-  java cscope tarballs impala_python impala_shell_pypi
+  java cscope tarballs impala_python impala_python3 impala_shell_pypi
 )
 add_custom_target(notests_regular_targets DEPENDS
   impalad statestored catalogd admissiond fesupport loggingsupport ImpalaUdf udasample udfsample impala-profile-tool
diff --git a/bin/banned_py3k_warnings.txt b/bin/banned_py3k_warnings.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index 98ec9d7d6..097484822 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -494,6 +494,9 @@ def get_toolchain_downloads():
        "crcutil", "curl", "flatbuffers", "gdb", "gflags", "glog", "gperftools", "gtest",
        "jwt-cpp", "libev", "libunwind", "lz4", "openldap", "openssl", "orc", "protobuf",
        "python", "rapidjson", "re2", "snappy", "tpc-h", "tpc-ds", "zlib", "zstd"])
+  python3_package = ToolchainPackage(
+      "python", explicit_version=os.environ.get("IMPALA_PYTHON3_VERSION"))
+  toolchain_packages += [python3_package]
   toolchain_packages += get_unique_toolchain_downloads(
       ["thrift:cpp", "thrift:java", "thrift:py"])
   protobuf_package_clang = ToolchainPackage(
diff --git a/bin/check-pylint-py3k.sh b/bin/check-pylint-py3k.sh
new file mode 100755
index 000000000..9dca19c87
--- /dev/null
+++ b/bin/check-pylint-py3k.sh
@@ -0,0 +1,140 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -euo pipefail
+
+BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+# To allow incrementally banning individual pylint checks, this uses grep
+# expressions to match banned pylint warnings. The grep expressions are stored
+# in the bin/banned_py3k_warnings.txt file.
+BANNED_PY3K_WARNINGS="${BINDIR}/banned_py3k_warnings.txt"
+
+function print_usage {
+    echo "check-pylink-py3k.sh : Checks eligible python files for pylint py3k compliance."
+    echo "Fails if the python files have py3k warnings that match the patterns in "
+    echo "bin/banned_py3k_warnings.txt."
+    echo "[--error_output_file] : (optional) Also output the errors to a file"
+    echo "[--warning_output_file] : (optional) Also output the warnings to a file"
+}
+
+ERROR_OUTPUT_FILE=""
+WARNING_OUTPUT_FILE=""
+while [ -n "$*" ]
+do
+    case "$1" in
+        --error_output_file)
+            ERROR_OUTPUT_FILE="${2-}"
+            shift;
+            ;;
+        --warning_output_file)
+            WARNING_OUTPUT_FILE="${2-}"
+            shift;
+            ;;
+        --help|*)
+            print_usage
+            exit 1
+            ;;
+    esac
+    shift
+done
+
+pushd ${IMPALA_HOME} > /dev/null 2>&1
+
+OUTPUT_TMP_DIR=$(mktemp -d)
+PYLINT_OUTPUT_FILE="${OUTPUT_TMP_DIR}/pylint_output.txt"
+ERROR_OUTPUT_TMP_FILE="${OUTPUT_TMP_DIR}/error_output_tmp.txt"
+WARNING_OUTPUT_TMP_FILE="${OUTPUT_TMP_DIR}/warning_output_tmp.txt"
+
+RETCODE=0
+for file in $(git ls-files '**/*.py'); do
+    # Skip the shell entirely (but cover tests/shell)
+    if [[ "${file}" =~ "shell/" && ! "${file}" =~ "tests/shell" ]]; then
+        continue
+    fi
+    # For the moment, the focus is on enforcing py3k checks on files that use the
+    # impala-python virtualenv. Ignore executable python files that do not
+    # use impala-python. In practice, this tends to be scripts used during the
+    # build or various scripts for developers in bin.
+    FIRST_LINE=$(head -n1 ${file})
+    if [[ "${file}: ${FIRST_LINE}" =~ "#!" ]]; then
+        if [[ "${FIRST_LINE}" =~ "python3" ]]; then
+            >&2 echo "SKIPPING: ${file} is already using python3: ${FIRST_LINE}"
+            continue
+        fi
+        if [[ ! "${FIRST_LINE}" =~ "impala-python" ]]; then
+            >&2 echo "SKIPPING: ${file} is not using impala-python: ${FIRST_LINE}"
+            continue
+        fi
+    fi
+
+    >&2 echo "PROCESSING: ${file}"
+
+    # -s n (skip score for each file)
+    # --exit-zero: don't fail
+    impala-pylint -s n --exit-zero --py3k ${file} >> ${PYLINT_OUTPUT_FILE}
+done
+
+touch "${ERROR_OUTPUT_TMP_FILE}"
+touch "${WARNING_OUTPUT_TMP_FILE}"
+
+# Hitting a banned py3k warning will cause this to return an error
+echo ""
+echo ""
+if grep -f "${BANNED_PY3K_WARNINGS}" "${PYLINT_OUTPUT_FILE}" > /dev/null 2>&1 ; then
+    echo "ERROR: Some python files contain these banned pylint warnings:" | \
+        tee "${ERROR_OUTPUT_TMP_FILE}"
+    grep -f "${BANNED_PY3K_WARNINGS}" "${PYLINT_OUTPUT_FILE}" | \
+        tee -a "${ERROR_OUTPUT_TMP_FILE}"
+    RETCODE=1
+else
+    echo "No errors found" | tee "${ERROR_OUTPUT_TMP_FILE}"
+fi
+
+if [[ -n "${ERROR_OUTPUT_FILE}" ]]; then
+    cp "${ERROR_OUTPUT_TMP_FILE}" "${ERROR_OUTPUT_FILE}"
+fi
+
+# The remaining py3k warnings are interesting, but they are not yet enforced.
+# Pylint produces annoying lines like "************* Module X", so try to filter those out
+echo ""
+echo ""
+if grep -v -e '\*\*\*\*' -f "${BANNED_PY3K_WARNINGS}" \
+        "${PYLINT_OUTPUT_FILE}" > /dev/null 2>&1 ; then
+    echo "WARNING: Some python files contain these unenforced pylint warnings:" | \
+        tee "${WARNING_OUTPUT_TMP_FILE}"
+    grep -v -e '\*\*\*\*' -f "${BANNED_PY3K_WARNINGS}" "${PYLINT_OUTPUT_FILE}" | \
+        tee -a "${WARNING_OUTPUT_TMP_FILE}"
+
+    echo "WARNING SUMMARY table:"
+    cat "${WARNING_OUTPUT_TMP_FILE}" | grep -v "WARNING" | cut -d: -f4- | \
+        sed 's#^ ##' | sort | uniq -c
+else
+    echo "No warnings found" | tee "${WARNING_OUTPUT_TMP_FILE}"
+fi
+
+if [[ -n "${WARNING_OUTPUT_FILE}" ]]; then
+    cp "${WARNING_OUTPUT_TMP_FILE}" "${WARNING_OUTPUT_FILE}"
+fi
+
+rm -rf "${OUTPUT_TMP_DIR}"
+
+popd > /dev/null 2>&1
+
+exit ${RETCODE}
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index ae9a00a42..1e9f9ff17 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -81,7 +81,7 @@ export USE_APACHE_HIVE=${USE_APACHE_HIVE-false}
 # moving to a different build of the toolchain, e.g. when a version is bumped or a
 # compile option is changed. The build id can be found in the output of the toolchain
 # build jobs, it is constructed from the build number and toolchain git hash prefix.
-export IMPALA_TOOLCHAIN_BUILD_ID=252-b144ba77b5
+export IMPALA_TOOLCHAIN_BUILD_ID=258-821f1d91bd
 # Versions of toolchain dependencies.
 # -----------------------------------
 export IMPALA_AVRO_VERSION=1.7.4-p5
@@ -159,6 +159,7 @@ export IMPALA_POSTGRES_JDBC_DRIVER_VERSION=42.5.1
 unset IMPALA_POSTGRES_JDBC_DRIVER_URL
 export IMPALA_PYTHON_VERSION=2.7.16
 unset IMPALA_PYTHON_URL
+export IMPALA_PYTHON3_VERSION=3.7.16
 export IMPALA_RAPIDJSON_VERSION=1.1.0
 unset IMPALA_RAPIDJSON_URL
 export IMPALA_RE2_VERSION=20190301
diff --git a/infra/python/deps/setuptools-requirements.txt b/bin/impala-pip3
old mode 100644
new mode 100755
similarity index 86%
copy from infra/python/deps/setuptools-requirements.txt
copy to bin/impala-pip3
index 071f9fc54..273555feb
--- a/infra/python/deps/setuptools-requirements.txt
+++ b/bin/impala-pip3
@@ -1,3 +1,5 @@
+#!/bin/bash
+#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -15,7 +17,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# Newer versions of setuptools don't support Python 2.7
-setuptools == 44.1.1
-  wheel == 0.35.1
-setuptools-scm == 4.1.2
+source "$(dirname "$0")/impala-python3-common.sh"
+exec "$PY_ENV_DIR/bin/python3" "$PY_ENV_DIR/bin/pip3" "$@"
diff --git a/infra/python/deps/setuptools-requirements.txt b/bin/impala-pylint
old mode 100644
new mode 100755
similarity index 86%
copy from infra/python/deps/setuptools-requirements.txt
copy to bin/impala-pylint
index 071f9fc54..012f08bc9
--- a/infra/python/deps/setuptools-requirements.txt
+++ b/bin/impala-pylint
@@ -1,3 +1,5 @@
+#!/bin/bash
+#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -15,7 +17,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# Newer versions of setuptools don't support Python 2.7
-setuptools == 44.1.1
-  wheel == 0.35.1
-setuptools-scm == 4.1.2
+source "$(dirname "$0")/impala-python3-common.sh"
+exec "$PY_ENV_DIR/bin/pylint" "$@"
diff --git a/infra/python/deps/setuptools-requirements.txt b/bin/impala-python3
old mode 100644
new mode 100755
similarity index 75%
copy from infra/python/deps/setuptools-requirements.txt
copy to bin/impala-python3
index 071f9fc54..aec831d12
--- a/infra/python/deps/setuptools-requirements.txt
+++ b/bin/impala-python3
@@ -1,3 +1,6 @@
+#!/bin/bash
+#
+##############################################################################
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -14,8 +17,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+##############################################################################
 
-# Newer versions of setuptools don't support Python 2.7
-setuptools == 44.1.1
-  wheel == 0.35.1
-setuptools-scm == 4.1.2
+source "$(dirname "$0")/impala-python3-common.sh"
+exec "$PY_ENV_DIR/bin/python3" "$@"
diff --git a/bin/init-impala-python.sh b/bin/impala-python3-common.sh
old mode 100755
new mode 100644
similarity index 60%
copy from bin/init-impala-python.sh
copy to bin/impala-python3-common.sh
index e1e20f4a4..06bf3a87a
--- a/bin/init-impala-python.sh
+++ b/bin/impala-python3-common.sh
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -16,16 +14,19 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-#
-# This is called during the build to initialize the impala-python
-# virtualenv (which involves installing various packages and
-# compiling things). This is not directly in CMake, because
-# this depends on knowing IMPALA_HOME and other environment
-# variables.
 
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-. "$bin"/impala-config.sh
+# This file is intended to be sourced to perform common setup for
+# the Python 3 $IMPALA_HOME/bin/impala-py* executables.
+
+set -euo pipefail
+. $IMPALA_HOME/bin/report_build_error.sh
+setup_report_build_error
+
+. $IMPALA_HOME/bin/set-pythonpath.sh
+
+export LD_LIBRARY_PATH="$(python "$IMPALA_HOME/infra/python/bootstrap_virtualenv.py" \
+  --print-ld-library-path)"
 
-cd $IMPALA_HOME
-bin/impala-python -c 'print("Initialized impala-python")'
+PY_DIR="$(dirname "$0")/../infra/python"
+PY_ENV_DIR="${PY_DIR}/env-gcc${IMPALA_GCC_VERSION}-py3"
+python "$PY_DIR/bootstrap_virtualenv.py" --python3
diff --git a/bin/init-impala-python.sh b/bin/init-impala-python.sh
index e1e20f4a4..360d2df89 100755
--- a/bin/init-impala-python.sh
+++ b/bin/init-impala-python.sh
@@ -27,5 +27,29 @@ bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 . "$bin"/impala-config.sh
 
+function print_usage {
+  echo "init-impala-python.sh - Script called from CMake to init python venvs"
+  echo "[-python3] : Init the python3 virtualenv (default is python2)"
+}
+
+IS_PYTHON3=false
+while [ -n "$*" ]
+do
+  case "$1" in
+    -python3)
+       IS_PYTHON3=true
+       ;;
+    -help|*)
+       print_usage
+       exit 1
+       ;;
+  esac
+  shift
+done
+
 cd $IMPALA_HOME
-bin/impala-python -c 'print("Initialized impala-python")'
+if $IS_PYTHON3 ; then
+    bin/impala-python3 -c 'print("Initialized impala-python3")'
+else
+    bin/impala-python -c 'print("Initialized impala-python")'
+fi
diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt
index 19d4ecbf2..825eef9b5 100644
--- a/bin/rat_exclude_files.txt
+++ b/bin/rat_exclude_files.txt
@@ -27,6 +27,7 @@ shell/packaging/MANIFEST.in
 shell/packaging/requirements.txt
 testdata/cluster/node_templates/cdh7/etc/init.d/kms
 testdata/authentication/*
+bin/banned_py3k_warnings.txt
 
 # See $IMPALA_HOME/LICENSE.txt
 be/src/gutil/*
diff --git a/infra/python/bootstrap_virtualenv.py b/infra/python/bootstrap_virtualenv.py
index c3bc59932..bd9c08144 100644
--- a/infra/python/bootstrap_virtualenv.py
+++ b/infra/python/bootstrap_virtualenv.py
@@ -55,7 +55,10 @@ SKIP_TOOLCHAIN_BOOTSTRAP = "SKIP_TOOLCHAIN_BOOTSTRAP"
 GCC_VERSION = os.environ["IMPALA_GCC_VERSION"]
 
 DEPS_DIR = os.path.join(os.path.dirname(__file__), "deps")
-ENV_DIR = os.path.join(os.path.dirname(__file__), "env-gcc{0}".format(GCC_VERSION))
+ENV_DIR_PY2 = os.path.join(os.path.dirname(__file__),
+                           "env-gcc{0}".format(GCC_VERSION))
+ENV_DIR_PY3 = os.path.join(os.path.dirname(__file__),
+                           "env-gcc{0}-py3".format(GCC_VERSION))
 
 # Setuptools requirements file. Setuptools is required during pip install for
 # some packages. Newer setuptools dropped python 2 support, and some python
@@ -77,10 +80,16 @@ KUDU_REQS_PATH = os.path.join(DEPS_DIR, "kudu-requirements.txt")
 # Interface) being installed by the requirements step.
 ADLS_REQS_PATH = os.path.join(DEPS_DIR, "adls-requirements.txt")
 
+# Extra packages specific to python 3
+PY3_REQS_PATH = os.path.join(DEPS_DIR, "py3-requirements.txt")
 
-def delete_virtualenv_if_exist():
-  if os.path.exists(ENV_DIR):
-    shutil.rmtree(ENV_DIR)
+# Extra packages specific to python 2
+PY2_REQS_PATH = os.path.join(DEPS_DIR, "py2-requirements.txt")
+
+
+def delete_virtualenv_if_exist(venv_dir):
+  if os.path.exists(venv_dir):
+    shutil.rmtree(venv_dir)
 
 
 def detect_virtualenv_version():
@@ -99,8 +108,16 @@ def detect_virtualenv_version():
   return None
 
 
-def create_virtualenv():
-  LOG.info("Creating python virtualenv")
+def create_virtualenv(venv_dir, is_py3):
+  if is_py3:
+    # Python 3 is much simpler, because there is a builtin venv command
+    LOG.info("Creating python3 virtualenv")
+    python_cmd = download_toolchain_python(is_py3)
+    exec_cmd([python_cmd, "-m" "venv", venv_dir])
+    return
+
+  # Python 2
+  LOG.info("Creating python2 virtualenv")
   build_dir = tempfile.mkdtemp()
   # Try to find the virtualenv version by parsing the requirements file
   # Default to "*" if we can't figure it out.
@@ -114,9 +131,9 @@ def create_virtualenv():
   for member in file.getmembers():
     file.extract(member, build_dir)
   file.close()
-  python_cmd = download_toolchain_python()
+  python_cmd = download_toolchain_python(is_py3)
   exec_cmd([python_cmd, find_file(build_dir, "virtualenv*", "virtualenv.py"), "--quiet",
-      "--python", python_cmd, ENV_DIR])
+      "--python", python_cmd, venv_dir])
   shutil.rmtree(build_dir)
 
 
@@ -147,7 +164,7 @@ def select_cc():
   return cc
 
 
-def exec_pip_install(args, cc="no-cc-available", env=None):
+def exec_pip_install(venv_dir, is_py3, args, cc="no-cc-available", env=None):
   '''Executes "pip install" with the provided command line arguments. If 'cc' is set,
   it is used as the C compiler. Otherwise compilation of C/C++ code is disabled by
   setting the CC environment variable to a bogus value.
@@ -169,8 +186,12 @@ def exec_pip_install(args, cc="no-cc-available", env=None):
   # Don't call the virtualenv pip directly, it uses a hashbang to to call the python
   # virtualenv using an absolute path. If the path to the virtualenv is very long, the
   # hashbang won't work.
-  impala_pip_base_cmd = [os.path.join(ENV_DIR, "bin", "python"),
-                         os.path.join(ENV_DIR, "bin", "pip"), "install", "-v"]
+  if is_py3:
+    impala_pip_base_cmd = [os.path.join(venv_dir, "bin", "python3"),
+                           os.path.join(venv_dir, "bin", "pip3"), "install", "-v"]
+  else:
+    impala_pip_base_cmd = [os.path.join(venv_dir, "bin", "python"),
+                           os.path.join(venv_dir, "bin", "pip"), "install", "-v"]
 
   # Passes --no-binary for IMPALA-3767: without this, Cython (and
   # several other packages) fail download.
@@ -181,7 +202,9 @@ def exec_pip_install(args, cc="no-cc-available", env=None):
       impala_pip_base_cmd[:] + ["--no-binary", ":all:", "--no-cache-dir"]
 
   # When using a custom mirror, we also must use the index of that mirror.
-  if "PYPI_MIRROR" in os.environ:
+  # The python 3 virtualenv has trouble with using --index-url with PYPI_MIRROR,
+  # so it falls back to --no-index, which works fine.
+  if "PYPI_MIRROR" in os.environ and not is_py3:
     third_party_pkg_install_cmd.extend(["--index-url",
                                         "%s/simple" % os.environ["PYPI_MIRROR"]])
   else:
@@ -217,7 +240,7 @@ def find_file(*paths):
   return files[0]
 
 
-def download_toolchain_python():
+def download_toolchain_python(is_py3):
   '''Grabs the Python implementation from the Impala toolchain, using the machinery from
      bin/bootstrap_toolchain.py.
      Skip the download if SKIP_TOOLCHAIN_BOOTSTRAP=true in the environment. In that case
@@ -229,27 +252,35 @@ def download_toolchain_python():
     raise Exception("Impala environment not set up correctly, make sure "
         "$IMPALA_TOOLCHAIN_PACKAGES_HOME is set.")
 
-  package = ToolchainPackage("python")
+  if is_py3:
+    package = ToolchainPackage("python",
+                               explicit_version=os.environ["IMPALA_PYTHON3_VERSION"])
+  else:
+    package = ToolchainPackage("python")
   if package.needs_download() and \
      not (os.environ.get(SKIP_TOOLCHAIN_BOOTSTRAP) == 'true'):
     package.download()
-  python_cmd = os.path.join(package.pkg_directory(), "bin/python")
+  if is_py3:
+    python_cmd = os.path.join(package.pkg_directory(), "bin/python3")
+  else:
+    python_cmd = os.path.join(package.pkg_directory(), "bin/python")
   if not os.path.exists(python_cmd):
     raise Exception("Unexpected error bootstrapping python from toolchain: {0} does not "
                     "exist".format(python_cmd))
   return python_cmd
 
 
-def install_deps():
-  LOG.info("Installing setuptools into the virtualenv")
-  exec_pip_install(["-r", SETUPTOOLS_REQS_PATH])
+def install_deps(venv_dir, is_py3):
+  py_str = "3" if is_py3 else "2"
+  LOG.info("Installing setuptools into the python{0} virtualenv".format(py_str))
+  exec_pip_install(venv_dir, is_py3, ["-r", SETUPTOOLS_REQS_PATH])
   cc = select_cc()
   if cc is None:
     raise Exception("CC not available")
   env = dict(os.environ)
-  LOG.info("Installing packages into the virtualenv")
-  exec_pip_install(["-r", REQS_PATH], cc=cc, env=env)
-  mark_reqs_installed(REQS_PATH)
+  LOG.info("Installing packages into the python{0} virtualenv".format(py_str))
+  exec_pip_install(venv_dir, is_py3, ["-r", REQS_PATH], cc=cc, env=env)
+  mark_reqs_installed(venv_dir, REQS_PATH)
 
 
 def have_toolchain():
@@ -264,26 +295,44 @@ def toolchain_pkg_dir(pkg_name):
       pkg_name + "-" + pkg_version)
 
 
-def install_adls_deps():
+def install_adls_deps(venv_dir, is_py3):
   # The ADLS dependencies require that the OS is at least CentOS 6.7 or above,
   # which is why we break this into a seperate step. If the target filesystem is
   # ADLS, the expectation is that the dev environment is running at least CentOS 6.7.
   if os.environ.get('TARGET_FILESYSTEM') == "adls":
-    if reqs_are_installed(ADLS_REQS_PATH):
+    if reqs_are_installed(venv_dir, ADLS_REQS_PATH):
       LOG.debug("Skipping ADLS deps: matching adls-installed-requirements.txt found")
       return True
     cc = select_cc()
     assert cc is not None
-    LOG.info("Installing ADLS packages into the virtualenv")
-    exec_pip_install(["-r", ADLS_REQS_PATH], cc=cc)
-    mark_reqs_installed(ADLS_REQS_PATH)
+    py_str = "3" if is_py3 else "2"
+    LOG.info("Installing ADLS packages into the python{0} virtualenv".format(py_str))
+    exec_pip_install(venv_dir, is_py3, ["-r", ADLS_REQS_PATH], cc=cc)
+    mark_reqs_installed(venv_dir, ADLS_REQS_PATH)
 
 
-def install_kudu_client_if_possible():
+def install_py_version_deps(venv_dir, is_py3):
+  cc = select_cc()
+  assert cc is not None
+  if not is_py3:
+    if not reqs_are_installed(venv_dir, PY2_REQS_PATH):
+      # These are extra python2-only packages
+      LOG.info("Installing python2 packages into the virtualenv")
+      exec_pip_install(venv_dir, is_py3, ["-r", PY2_REQS_PATH], cc=cc)
+      mark_reqs_installed(venv_dir, PY2_REQS_PATH)
+  else:
+    if not reqs_are_installed(venv_dir, PY3_REQS_PATH):
+      # These are extra python3-only packages
+      LOG.info("Installing python3 packages into the virtualenv")
+      exec_pip_install(venv_dir, is_py3, ["-r", PY3_REQS_PATH], cc=cc)
+      mark_reqs_installed(venv_dir, PY3_REQS_PATH)
+
+
+def install_kudu_client_if_possible(venv_dir, is_py3):
   '''Installs the Kudu python module if possible, which depends on the toolchain and
   the compiled requirements in requirements.txt. If the toolchain isn't
   available, nothing will be done.'''
-  if reqs_are_installed(KUDU_REQS_PATH):
+  if reqs_are_installed(venv_dir, KUDU_REQS_PATH):
     LOG.debug("Skipping Kudu: matching kudu-installed-requirements.txt found")
     return
   kudu_base_dir = os.environ["IMPALA_KUDU_HOME"]
@@ -291,11 +340,13 @@ def install_kudu_client_if_possible():
     LOG.debug("Skipping Kudu: %s doesn't exist" % kudu_base_dir)
     return
 
-  LOG.info("Installing Kudu into the virtualenv")
+  py_str = "3" if is_py3 else "2"
+  LOG.info("Installing Kudu into the python{0} virtualenv".format(py_str))
   # The installation requires that KUDU_HOME/build/latest exists. An empty directory
   # structure will be made to satisfy that. The Kudu client headers and lib will be made
   # available through GCC environment variables.
-  fake_kudu_build_dir = os.path.join(tempfile.gettempdir(), "virtualenv-kudu")
+  fake_kudu_build_dir = os.path.join(tempfile.gettempdir(),
+                                     "virtualenv-kudu{0}".format(py_str))
   try:
     artifact_dir = os.path.join(fake_kudu_build_dir, "build", "latest")
     if not os.path.exists(artifact_dir):
@@ -312,8 +363,8 @@ def install_kudu_client_if_possible():
     env["CPLUS_INCLUDE_PATH"] = os.path.join(kudu_client_dir, "include")
     env["LIBRARY_PATH"] = os.path.pathsep.join([os.path.join(kudu_client_dir, 'lib'),
                                                 os.path.join(kudu_client_dir, 'lib64')])
-    exec_pip_install(["-r", KUDU_REQS_PATH], cc=cc, env=env)
-    mark_reqs_installed(KUDU_REQS_PATH)
+    exec_pip_install(venv_dir, is_py3, ["-r", KUDU_REQS_PATH], cc=cc, env=env)
+    mark_reqs_installed(venv_dir, KUDU_REQS_PATH)
   finally:
     try:
       shutil.rmtree(fake_kudu_build_dir)
@@ -353,17 +404,17 @@ def error_if_kudu_client_not_found(install_dir):
   raise Exception("%s not found at %s" % (kudu_client_lib, lib_dir))
 
 
-def mark_reqs_installed(reqs_path):
+def mark_reqs_installed(venv_dir, reqs_path):
   '''Mark that the requirements from the given file are installed by copying it into
   the root directory of the virtualenv.'''
-  installed_reqs_path = os.path.join(ENV_DIR, os.path.basename(reqs_path))
+  installed_reqs_path = os.path.join(venv_dir, os.path.basename(reqs_path))
   shutil.copyfile(reqs_path, installed_reqs_path)
 
 
-def reqs_are_installed(reqs_path):
+def reqs_are_installed(venv_dir, reqs_path):
   '''Check if the requirements from the given file are installed in the virtualenv by
   looking for a matching requirements file in the root directory of the virtualenv.'''
-  installed_reqs_path = os.path.join(ENV_DIR, os.path.basename(reqs_path))
+  installed_reqs_path = os.path.join(venv_dir, os.path.basename(reqs_path))
   if not os.path.exists(installed_reqs_path):
     return False
   installed_reqs_file = open(installed_reqs_path)
@@ -381,11 +432,11 @@ def reqs_are_installed(reqs_path):
     installed_reqs_file.close()
 
 
-def setup_virtualenv_if_not_exists():
-  if not (reqs_are_installed(REQS_PATH)):
-    delete_virtualenv_if_exist()
-    create_virtualenv()
-    install_deps()
+def setup_virtualenv_if_not_exists(venv_dir, is_py3):
+  if not (reqs_are_installed(venv_dir, REQS_PATH)):
+    delete_virtualenv_if_exist(venv_dir)
+    create_virtualenv(venv_dir, is_py3)
+    install_deps(venv_dir, is_py3)
     LOG.debug("Virtualenv setup complete")
 
 
@@ -397,6 +448,8 @@ if __name__ == "__main__":
       " the virtualenv even if it exists and appears to be completely up-to-date.")
   parser.add_option("--print-ld-library-path", action="store_true", help="Print the"
       " LD_LIBRARY_PATH that should be used when running python from the virtualenv.")
+  parser.add_option("--python3", action="store_true", help="Generate the python3"
+      " virtualenv")
   options, args = parser.parse_args()
 
   if options.print_ld_library_path:
@@ -411,10 +464,17 @@ if __name__ == "__main__":
     sys.exit()
 
   logging.basicConfig(level=getattr(logging, options.log_level))
+
+  if options.python3:
+    venv_dir = ENV_DIR_PY3
+  else:
+    venv_dir = ENV_DIR_PY2
+
   if options.rebuild:
-    delete_virtualenv_if_exist()
+    delete_virtualenv_if_exist(venv_dir)
 
   # Complete as many bootstrap steps as possible (see file comment for the steps).
-  setup_virtualenv_if_not_exists()
-  install_kudu_client_if_possible()
-  install_adls_deps()
+  setup_virtualenv_if_not_exists(venv_dir, options.python3)
+  install_kudu_client_if_possible(venv_dir, options.python3)
+  install_adls_deps(venv_dir, options.python3)
+  install_py_version_deps(venv_dir, options.python3)
diff --git a/infra/python/deps/pip_download.py b/infra/python/deps/pip_download.py
index 9c41135d8..03713f927 100755
--- a/infra/python/deps/pip_download.py
+++ b/infra/python/deps/pip_download.py
@@ -38,7 +38,8 @@ PYPI_MIRROR = os.environ.get('PYPI_MIRROR', 'https://pypi.python.org')
 
 # The requirement files that list all of the required packages and versions.
 REQUIREMENTS_FILES = ['requirements.txt', 'setuptools-requirements.txt',
-                      'kudu-requirements.txt', 'adls-requirements.txt']
+                      'kudu-requirements.txt', 'adls-requirements.txt',
+                      'py2-requirements.txt', 'py3-requirements.txt']
 
 
 def check_digest(filename, algorithm, expected_digest):
diff --git a/infra/python/deps/setuptools-requirements.txt b/infra/python/deps/py2-requirements.txt
similarity index 65%
copy from infra/python/deps/setuptools-requirements.txt
copy to infra/python/deps/py2-requirements.txt
index 071f9fc54..122b5ab68 100644
--- a/infra/python/deps/setuptools-requirements.txt
+++ b/infra/python/deps/py2-requirements.txt
@@ -15,7 +15,20 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# Newer versions of setuptools don't support Python 2.7
-setuptools == 44.1.1
-  wheel == 0.35.1
-setuptools-scm == 4.1.2
+# Python2-only requirements
+
+cm-api == 10.0.0
+  # Already available as part of python on Linux.
+  readline == 6.2.4.1; sys_platform == 'darwin'
+flake8 == 3.9.2
+  mccabe == 0.6.1
+  pycodestyle == 2.7.0
+  pyflakes == 2.3.1
+  enum34 == 1.1.10
+  typing == 3.10.0.0
+  configparser == 4.0.2
+  functools32 == 3.2.3-2
+  importlib-metadata == 2.1.3
+    contextlib2 == 0.6.0
+    pathlib2 == 2.3.7.post1
+    zipp == 1.2.0
diff --git a/infra/python/deps/setuptools-requirements.txt b/infra/python/deps/py3-requirements.txt
similarity index 70%
copy from infra/python/deps/setuptools-requirements.txt
copy to infra/python/deps/py3-requirements.txt
index 071f9fc54..d6195a1e8 100644
--- a/infra/python/deps/setuptools-requirements.txt
+++ b/infra/python/deps/py3-requirements.txt
@@ -15,7 +15,17 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# Newer versions of setuptools don't support Python 2.7
-setuptools == 44.1.1
-  wheel == 0.35.1
-setuptools-scm == 4.1.2
+# Python3-only requirements
+
+pylint == 2.10.2
+  astroid == 2.7.3
+    lazy-object-proxy == 1.6.0
+    wrapt == 1.12.1
+    typed-ast == 1.4.3
+  configparser == 4.0.2
+  isort == 4.3.21
+    futures == 3.3.0; python_version == "2.7"
+  singledispatch == 3.6.1
+  toml == 0.10.2
+  platformdirs == 2.4.1
+  typing-extensions == 3.10.0.2
diff --git a/infra/python/deps/requirements.txt b/infra/python/deps/requirements.txt
index 271ddeadc..fe61de14b 100644
--- a/infra/python/deps/requirements.txt
+++ b/infra/python/deps/requirements.txt
@@ -20,23 +20,8 @@
 # Dependents are indented. Dependents that have multiple parents are not listed
 # multiple times (though maybe they could be).
 
-allpairs == 2.0.1
+allpairspy == 2.5.0
 argparse == 1.4.0
-cm-api == 10.0.0
-  # Already available as part of python on Linux.
-  readline == 6.2.4.1; sys_platform == 'darwin'
-flake8 == 3.9.2
-  mccabe == 0.6.1
-  pycodestyle == 2.7.0
-  pyflakes == 2.3.1
-  enum34 == 1.1.10
-  typing == 3.10.0.0
-  configparser == 4.0.2
-  functools32 == 3.2.3-2
-  importlib-metadata == 2.1.3
-    contextlib2 == 0.6.0
-    pathlib2 == 2.3.7.post1
-    zipp == 1.2.0
 future == 0.18.3
 gcovr == 4.2
   Jinja2 == 2.11.3
@@ -61,14 +46,13 @@ pyparsing == 2.0.3
 pytest == 2.9.2
   py == 1.4.32
   pytest-forked == 0.2
-  pytest-random == 0.02
   pytest-runner == 4.2
   pytest-xdist == 1.17.1
   pytest-timeout == 1.2.1
 python-magic == 0.4.11
 pywebhdfs == 0.3.2
   pbr == 3.1.1
-requests == 2.20.0
+requests == 2.21.0
   chardet == 3.0.4
   idna == 2.8
   urllib3 == 1.24.2
diff --git a/infra/python/deps/setuptools-requirements.txt b/infra/python/deps/setuptools-requirements.txt
index 071f9fc54..713bfa0af 100644
--- a/infra/python/deps/setuptools-requirements.txt
+++ b/infra/python/deps/setuptools-requirements.txt
@@ -18,4 +18,4 @@
 # Newer versions of setuptools don't support Python 2.7
 setuptools == 44.1.1
   wheel == 0.35.1
-setuptools-scm == 4.1.2
+setuptools-scm == 5.0.2
diff --git a/testdata/bin/generate-test-vectors.py b/testdata/bin/generate-test-vectors.py
index 4998a8caa..c7d288bd8 100755
--- a/testdata/bin/generate-test-vectors.py
+++ b/testdata/bin/generate-test-vectors.py
@@ -48,8 +48,7 @@ import os
 import sys
 from itertools import product
 from optparse import OptionParser
-import metacomm.combinatorics.all_pairs2
-all_pairs = metacomm.combinatorics.all_pairs2.all_pairs2
+from allpairspy import AllPairs as all_pairs
 
 parser = OptionParser()
 parser.add_option("-w", "--workload", dest="workload",
diff --git a/tests/common/test_vector.py b/tests/common/test_vector.py
index 005c35adb..8fcac5a79 100644
--- a/tests/common/test_vector.py
+++ b/tests/common/test_vector.py
@@ -136,8 +136,8 @@ class ImpalaTestMatrix(object):
               if self.is_valid(vec)]
 
   def __generate_pairwise_combinations(self):
-    import metacomm.combinatorics.all_pairs2
-    all_pairs = metacomm.combinatorics.all_pairs2.all_pairs2
+    from allpairspy import AllPairs
+    all_pairs = AllPairs
 
     # Pairwise fails if the number of inputs == 1. Use exhaustive in this case the
     # results will be the same.
diff --git a/tests/custom_cluster/test_hs2_fault_injection.py b/tests/custom_cluster/test_hs2_fault_injection.py
index 0b3f8e7b4..27e536fdd 100644
--- a/tests/custom_cluster/test_hs2_fault_injection.py
+++ b/tests/custom_cluster/test_hs2_fault_injection.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
diff --git a/tests/query_test/test_decimal_casting.py b/tests/query_test/test_decimal_casting.py
index 752e3ad64..f487e8bc3 100644
--- a/tests/query_test/test_decimal_casting.py
+++ b/tests/query_test/test_decimal_casting.py
@@ -19,7 +19,7 @@
 #
 import pytest
 from decimal import Decimal, getcontext, ROUND_DOWN, ROUND_HALF_UP
-from metacomm.combinatorics.all_pairs2 import all_pairs2 as all_pairs
+from allpairspy import AllPairs as all_pairs
 from random import randint
 
 from tests.common.impala_test_suite import ImpalaTestSuite