You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2023/03/09 17:22:50 UTC
[impala] 01/06: IMPALA-11959: Add Python 3 virtualenv
This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 566df808913aef6ff5eecc3849e14df8370bd651
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Tue Apr 6 21:36:07 2021 -0700
IMPALA-11959: Add Python 3 virtualenv
This adds a Python 3 equivalent to the impala-python
virtualenv base on the toolchain Python 3.7.16.
This modifies bootstrap_virtualenv.py to support
the two different modes. This adds py2-requirements.txt
and py3-requirements.txt to allow some differences
between the Python 2 and Python 3 virtualenvs.
Here are some specific package changes:
- allpairs is replaced with allpairspy, as allpairs did
not support Python 3.
- requests is upgraded slightly, because otherwise is has issues
with idna==2.8.
- pylint is limited to Python 3, because we are adding it
and don't need it on both
- flake8 is limited to Python 2, because it will take
some work to switch to a version that works on Python 3
- cm_api is limited to Python 2, because it doesn't support
Python 3
- pytest-random does not support Python 3 and it is unused,
so it is removed
- Bump the version of setuptool-scm to support Python 3
This adds impala-pylint, which can be used to do further
Python 3 checks via --py3k. This also adds a bin/check-pylint-py3k.sh
script to enforce specific py3k checks. The banned py3k warnings
are specified in the bin/banned_py3k_warnings.txt. This is currently
empty, but this can ratchet up the py3k strictness over time
to avoid regressions.
This pulls in a new toolchain with the fix for IMPALA-11956
to get Python 3.7.16.
Testing:
- Hand tested that the allpairs libraries produce the
same results
- The python3 virtualenv has no influence on regular
tests yet
Change-Id: Ica4853f440c9a46a79bd5fb8e0a66730b0b4efc0
Reviewed-on: http://gerrit.cloudera.org:8080/19567
Reviewed-by: Joe McDonnell <jo...@cloudera.com>
Tested-by: Joe McDonnell <jo...@cloudera.com>
---
CMakeLists.txt | 6 +-
bin/banned_py3k_warnings.txt | 0
bin/bootstrap_toolchain.py | 3 +
bin/check-pylint-py3k.sh | 140 +++++++++++++++++++
bin/impala-config.sh | 3 +-
.../setuptools-requirements.txt => bin/impala-pip3 | 8 +-
.../impala-pylint | 8 +-
.../impala-python3 | 10 +-
...t-impala-python.sh => impala-python3-common.sh} | 27 ++--
bin/init-impala-python.sh | 26 +++-
bin/rat_exclude_files.txt | 1 +
infra/python/bootstrap_virtualenv.py | 150 ++++++++++++++-------
infra/python/deps/pip_download.py | 3 +-
...tools-requirements.txt => py2-requirements.txt} | 21 ++-
...tools-requirements.txt => py3-requirements.txt} | 18 ++-
infra/python/deps/requirements.txt | 20 +--
infra/python/deps/setuptools-requirements.txt | 2 +-
testdata/bin/generate-test-vectors.py | 3 +-
tests/common/test_vector.py | 4 +-
tests/custom_cluster/test_hs2_fault_injection.py | 2 -
tests/query_test/test_decimal_casting.py | 2 +-
21 files changed, 349 insertions(+), 108 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cb35b393a..2bfc50f5d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -514,6 +514,10 @@ add_custom_target(impala_python ALL
COMMAND "${CMAKE_SOURCE_DIR}/bin/init-impala-python.sh"
)
+add_custom_target(impala_python3 ALL
+ COMMAND "${CMAKE_SOURCE_DIR}/bin/init-impala-python.sh" "-python3"
+)
+
set(IMPALA_PYTHON_INSTALLS "")
if (NOT $ENV{IMPALA_SYSTEM_PYTHON2} EQUAL "")
list(APPEND IMPALA_PYTHON_INSTALLS shell_python2_install)
@@ -524,7 +528,7 @@ endif()
add_custom_target(impala_shell_pypi ALL DEPENDS ${IMPALA_PYTHON_INSTALLS})
add_custom_target(notests_independent_targets DEPENDS
- java cscope tarballs impala_python impala_shell_pypi
+ java cscope tarballs impala_python impala_python3 impala_shell_pypi
)
add_custom_target(notests_regular_targets DEPENDS
impalad statestored catalogd admissiond fesupport loggingsupport ImpalaUdf udasample udfsample impala-profile-tool
diff --git a/bin/banned_py3k_warnings.txt b/bin/banned_py3k_warnings.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index 98ec9d7d6..097484822 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -494,6 +494,9 @@ def get_toolchain_downloads():
"crcutil", "curl", "flatbuffers", "gdb", "gflags", "glog", "gperftools", "gtest",
"jwt-cpp", "libev", "libunwind", "lz4", "openldap", "openssl", "orc", "protobuf",
"python", "rapidjson", "re2", "snappy", "tpc-h", "tpc-ds", "zlib", "zstd"])
+ python3_package = ToolchainPackage(
+ "python", explicit_version=os.environ.get("IMPALA_PYTHON3_VERSION"))
+ toolchain_packages += [python3_package]
toolchain_packages += get_unique_toolchain_downloads(
["thrift:cpp", "thrift:java", "thrift:py"])
protobuf_package_clang = ToolchainPackage(
diff --git a/bin/check-pylint-py3k.sh b/bin/check-pylint-py3k.sh
new file mode 100755
index 000000000..9dca19c87
--- /dev/null
+++ b/bin/check-pylint-py3k.sh
@@ -0,0 +1,140 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -euo pipefail
+
+BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+# To allow incrementally banning individual pylint checks, this uses grep
+# expressions to match banned pylint warnings. The grep expressions are stored
+# in the bin/banned_py3k_warnings.txt file.
+BANNED_PY3K_WARNINGS="${BINDIR}/banned_py3k_warnings.txt"
+
+function print_usage {
+ echo "check-pylink-py3k.sh : Checks eligible python files for pylint py3k compliance."
+ echo "Fails if the python files have py3k warnings that match the patterns in "
+ echo "bin/banned_py3k_warnings.txt."
+ echo "[--error_output_file] : (optional) Also output the errors to a file"
+ echo "[--warning_output_file] : (optional) Also output the warnings to a file"
+}
+
+ERROR_OUTPUT_FILE=""
+WARNING_OUTPUT_FILE=""
+while [ -n "$*" ]
+do
+ case "$1" in
+ --error_output_file)
+ ERROR_OUTPUT_FILE="${2-}"
+ shift;
+ ;;
+ --warning_output_file)
+ WARNING_OUTPUT_FILE="${2-}"
+ shift;
+ ;;
+ --help|*)
+ print_usage
+ exit 1
+ ;;
+ esac
+ shift
+done
+
+pushd ${IMPALA_HOME} > /dev/null 2>&1
+
+OUTPUT_TMP_DIR=$(mktemp -d)
+PYLINT_OUTPUT_FILE="${OUTPUT_TMP_DIR}/pylint_output.txt"
+ERROR_OUTPUT_TMP_FILE="${OUTPUT_TMP_DIR}/error_output_tmp.txt"
+WARNING_OUTPUT_TMP_FILE="${OUTPUT_TMP_DIR}/warning_output_tmp.txt"
+
+RETCODE=0
+for file in $(git ls-files '**/*.py'); do
+ # Skip the shell entirely (but cover tests/shell)
+ if [[ "${file}" =~ "shell/" && ! "${file}" =~ "tests/shell" ]]; then
+ continue
+ fi
+ # For the moment, the focus is on enforcing py3k checks on files that use the
+ # impala-python virtualenv. Ignore executable python files that do not
+ # use impala-python. In practice, this tends to be scripts used during the
+ # build or various scripts for developers in bin.
+ FIRST_LINE=$(head -n1 ${file})
+ if [[ "${file}: ${FIRST_LINE}" =~ "#!" ]]; then
+ if [[ "${FIRST_LINE}" =~ "python3" ]]; then
+ >&2 echo "SKIPPING: ${file} is already using python3: ${FIRST_LINE}"
+ continue
+ fi
+ if [[ ! "${FIRST_LINE}" =~ "impala-python" ]]; then
+ >&2 echo "SKIPPING: ${file} is not using impala-python: ${FIRST_LINE}"
+ continue
+ fi
+ fi
+
+ >&2 echo "PROCESSING: ${file}"
+
+ # -s n (skip score for each file)
+ # --exit-zero: don't fail
+ impala-pylint -s n --exit-zero --py3k ${file} >> ${PYLINT_OUTPUT_FILE}
+done
+
+touch "${ERROR_OUTPUT_TMP_FILE}"
+touch "${WARNING_OUTPUT_TMP_FILE}"
+
+# Hitting a banned py3k warning will cause this to return an error
+echo ""
+echo ""
+if grep -f "${BANNED_PY3K_WARNINGS}" "${PYLINT_OUTPUT_FILE}" > /dev/null 2>&1 ; then
+ echo "ERROR: Some python files contain these banned pylint warnings:" | \
+ tee "${ERROR_OUTPUT_TMP_FILE}"
+ grep -f "${BANNED_PY3K_WARNINGS}" "${PYLINT_OUTPUT_FILE}" | \
+ tee -a "${ERROR_OUTPUT_TMP_FILE}"
+ RETCODE=1
+else
+ echo "No errors found" | tee "${ERROR_OUTPUT_TMP_FILE}"
+fi
+
+if [[ -n "${ERROR_OUTPUT_FILE}" ]]; then
+ cp "${ERROR_OUTPUT_TMP_FILE}" "${ERROR_OUTPUT_FILE}"
+fi
+
+# The remaining py3k warnings are interesting, but they are not yet enforced.
+# Pylint produces annoying lines like "************* Module X", so try to filter those out
+echo ""
+echo ""
+if grep -v -e '\*\*\*\*' -f "${BANNED_PY3K_WARNINGS}" \
+ "${PYLINT_OUTPUT_FILE}" > /dev/null 2>&1 ; then
+ echo "WARNING: Some python files contain these unenforced pylint warnings:" | \
+ tee "${WARNING_OUTPUT_TMP_FILE}"
+ grep -v -e '\*\*\*\*' -f "${BANNED_PY3K_WARNINGS}" "${PYLINT_OUTPUT_FILE}" | \
+ tee -a "${WARNING_OUTPUT_TMP_FILE}"
+
+ echo "WARNING SUMMARY table:"
+ cat "${WARNING_OUTPUT_TMP_FILE}" | grep -v "WARNING" | cut -d: -f4- | \
+ sed 's#^ ##' | sort | uniq -c
+else
+ echo "No warnings found" | tee "${WARNING_OUTPUT_TMP_FILE}"
+fi
+
+if [[ -n "${WARNING_OUTPUT_FILE}" ]]; then
+ cp "${WARNING_OUTPUT_TMP_FILE}" "${WARNING_OUTPUT_FILE}"
+fi
+
+rm -rf "${OUTPUT_TMP_DIR}"
+
+popd > /dev/null 2>&1
+
+exit ${RETCODE}
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index ae9a00a42..1e9f9ff17 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -81,7 +81,7 @@ export USE_APACHE_HIVE=${USE_APACHE_HIVE-false}
# moving to a different build of the toolchain, e.g. when a version is bumped or a
# compile option is changed. The build id can be found in the output of the toolchain
# build jobs, it is constructed from the build number and toolchain git hash prefix.
-export IMPALA_TOOLCHAIN_BUILD_ID=252-b144ba77b5
+export IMPALA_TOOLCHAIN_BUILD_ID=258-821f1d91bd
# Versions of toolchain dependencies.
# -----------------------------------
export IMPALA_AVRO_VERSION=1.7.4-p5
@@ -159,6 +159,7 @@ export IMPALA_POSTGRES_JDBC_DRIVER_VERSION=42.5.1
unset IMPALA_POSTGRES_JDBC_DRIVER_URL
export IMPALA_PYTHON_VERSION=2.7.16
unset IMPALA_PYTHON_URL
+export IMPALA_PYTHON3_VERSION=3.7.16
export IMPALA_RAPIDJSON_VERSION=1.1.0
unset IMPALA_RAPIDJSON_URL
export IMPALA_RE2_VERSION=20190301
diff --git a/infra/python/deps/setuptools-requirements.txt b/bin/impala-pip3
old mode 100644
new mode 100755
similarity index 86%
copy from infra/python/deps/setuptools-requirements.txt
copy to bin/impala-pip3
index 071f9fc54..273555feb
--- a/infra/python/deps/setuptools-requirements.txt
+++ b/bin/impala-pip3
@@ -1,3 +1,5 @@
+#!/bin/bash
+#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -15,7 +17,5 @@
# specific language governing permissions and limitations
# under the License.
-# Newer versions of setuptools don't support Python 2.7
-setuptools == 44.1.1
- wheel == 0.35.1
-setuptools-scm == 4.1.2
+source "$(dirname "$0")/impala-python3-common.sh"
+exec "$PY_ENV_DIR/bin/python3" "$PY_ENV_DIR/bin/pip3" "$@"
diff --git a/infra/python/deps/setuptools-requirements.txt b/bin/impala-pylint
old mode 100644
new mode 100755
similarity index 86%
copy from infra/python/deps/setuptools-requirements.txt
copy to bin/impala-pylint
index 071f9fc54..012f08bc9
--- a/infra/python/deps/setuptools-requirements.txt
+++ b/bin/impala-pylint
@@ -1,3 +1,5 @@
+#!/bin/bash
+#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -15,7 +17,5 @@
# specific language governing permissions and limitations
# under the License.
-# Newer versions of setuptools don't support Python 2.7
-setuptools == 44.1.1
- wheel == 0.35.1
-setuptools-scm == 4.1.2
+source "$(dirname "$0")/impala-python3-common.sh"
+exec "$PY_ENV_DIR/bin/pylint" "$@"
diff --git a/infra/python/deps/setuptools-requirements.txt b/bin/impala-python3
old mode 100644
new mode 100755
similarity index 75%
copy from infra/python/deps/setuptools-requirements.txt
copy to bin/impala-python3
index 071f9fc54..aec831d12
--- a/infra/python/deps/setuptools-requirements.txt
+++ b/bin/impala-python3
@@ -1,3 +1,6 @@
+#!/bin/bash
+#
+##############################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -14,8 +17,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+##############################################################################
-# Newer versions of setuptools don't support Python 2.7
-setuptools == 44.1.1
- wheel == 0.35.1
-setuptools-scm == 4.1.2
+source "$(dirname "$0")/impala-python3-common.sh"
+exec "$PY_ENV_DIR/bin/python3" "$@"
diff --git a/bin/init-impala-python.sh b/bin/impala-python3-common.sh
old mode 100755
new mode 100644
similarity index 60%
copy from bin/init-impala-python.sh
copy to bin/impala-python3-common.sh
index e1e20f4a4..06bf3a87a
--- a/bin/init-impala-python.sh
+++ b/bin/impala-python3-common.sh
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -16,16 +14,19 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-#
-# This is called during the build to initialize the impala-python
-# virtualenv (which involves installing various packages and
-# compiling things). This is not directly in CMake, because
-# this depends on knowing IMPALA_HOME and other environment
-# variables.
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-. "$bin"/impala-config.sh
+# This file is intended to be sourced to perform common setup for
+# the Python 3 $IMPALA_HOME/bin/impala-py* executables.
+
+set -euo pipefail
+. $IMPALA_HOME/bin/report_build_error.sh
+setup_report_build_error
+
+. $IMPALA_HOME/bin/set-pythonpath.sh
+
+export LD_LIBRARY_PATH="$(python "$IMPALA_HOME/infra/python/bootstrap_virtualenv.py" \
+ --print-ld-library-path)"
-cd $IMPALA_HOME
-bin/impala-python -c 'print("Initialized impala-python")'
+PY_DIR="$(dirname "$0")/../infra/python"
+PY_ENV_DIR="${PY_DIR}/env-gcc${IMPALA_GCC_VERSION}-py3"
+python "$PY_DIR/bootstrap_virtualenv.py" --python3
diff --git a/bin/init-impala-python.sh b/bin/init-impala-python.sh
index e1e20f4a4..360d2df89 100755
--- a/bin/init-impala-python.sh
+++ b/bin/init-impala-python.sh
@@ -27,5 +27,29 @@ bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/impala-config.sh
+function print_usage {
+ echo "init-impala-python.sh - Script called from CMake to init python venvs"
+ echo "[-python3] : Init the python3 virtualenv (default is python2)"
+}
+
+IS_PYTHON3=false
+while [ -n "$*" ]
+do
+ case "$1" in
+ -python3)
+ IS_PYTHON3=true
+ ;;
+ -help|*)
+ print_usage
+ exit 1
+ ;;
+ esac
+ shift
+done
+
cd $IMPALA_HOME
-bin/impala-python -c 'print("Initialized impala-python")'
+if $IS_PYTHON3 ; then
+ bin/impala-python3 -c 'print("Initialized impala-python3")'
+else
+ bin/impala-python -c 'print("Initialized impala-python")'
+fi
diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt
index 19d4ecbf2..825eef9b5 100644
--- a/bin/rat_exclude_files.txt
+++ b/bin/rat_exclude_files.txt
@@ -27,6 +27,7 @@ shell/packaging/MANIFEST.in
shell/packaging/requirements.txt
testdata/cluster/node_templates/cdh7/etc/init.d/kms
testdata/authentication/*
+bin/banned_py3k_warnings.txt
# See $IMPALA_HOME/LICENSE.txt
be/src/gutil/*
diff --git a/infra/python/bootstrap_virtualenv.py b/infra/python/bootstrap_virtualenv.py
index c3bc59932..bd9c08144 100644
--- a/infra/python/bootstrap_virtualenv.py
+++ b/infra/python/bootstrap_virtualenv.py
@@ -55,7 +55,10 @@ SKIP_TOOLCHAIN_BOOTSTRAP = "SKIP_TOOLCHAIN_BOOTSTRAP"
GCC_VERSION = os.environ["IMPALA_GCC_VERSION"]
DEPS_DIR = os.path.join(os.path.dirname(__file__), "deps")
-ENV_DIR = os.path.join(os.path.dirname(__file__), "env-gcc{0}".format(GCC_VERSION))
+ENV_DIR_PY2 = os.path.join(os.path.dirname(__file__),
+ "env-gcc{0}".format(GCC_VERSION))
+ENV_DIR_PY3 = os.path.join(os.path.dirname(__file__),
+ "env-gcc{0}-py3".format(GCC_VERSION))
# Setuptools requirements file. Setuptools is required during pip install for
# some packages. Newer setuptools dropped python 2 support, and some python
@@ -77,10 +80,16 @@ KUDU_REQS_PATH = os.path.join(DEPS_DIR, "kudu-requirements.txt")
# Interface) being installed by the requirements step.
ADLS_REQS_PATH = os.path.join(DEPS_DIR, "adls-requirements.txt")
+# Extra packages specific to python 3
+PY3_REQS_PATH = os.path.join(DEPS_DIR, "py3-requirements.txt")
-def delete_virtualenv_if_exist():
- if os.path.exists(ENV_DIR):
- shutil.rmtree(ENV_DIR)
+# Extra packages specific to python 2
+PY2_REQS_PATH = os.path.join(DEPS_DIR, "py2-requirements.txt")
+
+
+def delete_virtualenv_if_exist(venv_dir):
+ if os.path.exists(venv_dir):
+ shutil.rmtree(venv_dir)
def detect_virtualenv_version():
@@ -99,8 +108,16 @@ def detect_virtualenv_version():
return None
-def create_virtualenv():
- LOG.info("Creating python virtualenv")
+def create_virtualenv(venv_dir, is_py3):
+ if is_py3:
+ # Python 3 is much simpler, because there is a builtin venv command
+ LOG.info("Creating python3 virtualenv")
+ python_cmd = download_toolchain_python(is_py3)
+ exec_cmd([python_cmd, "-m" "venv", venv_dir])
+ return
+
+ # Python 2
+ LOG.info("Creating python2 virtualenv")
build_dir = tempfile.mkdtemp()
# Try to find the virtualenv version by parsing the requirements file
# Default to "*" if we can't figure it out.
@@ -114,9 +131,9 @@ def create_virtualenv():
for member in file.getmembers():
file.extract(member, build_dir)
file.close()
- python_cmd = download_toolchain_python()
+ python_cmd = download_toolchain_python(is_py3)
exec_cmd([python_cmd, find_file(build_dir, "virtualenv*", "virtualenv.py"), "--quiet",
- "--python", python_cmd, ENV_DIR])
+ "--python", python_cmd, venv_dir])
shutil.rmtree(build_dir)
@@ -147,7 +164,7 @@ def select_cc():
return cc
-def exec_pip_install(args, cc="no-cc-available", env=None):
+def exec_pip_install(venv_dir, is_py3, args, cc="no-cc-available", env=None):
'''Executes "pip install" with the provided command line arguments. If 'cc' is set,
it is used as the C compiler. Otherwise compilation of C/C++ code is disabled by
setting the CC environment variable to a bogus value.
@@ -169,8 +186,12 @@ def exec_pip_install(args, cc="no-cc-available", env=None):
# Don't call the virtualenv pip directly, it uses a hashbang to to call the python
# virtualenv using an absolute path. If the path to the virtualenv is very long, the
# hashbang won't work.
- impala_pip_base_cmd = [os.path.join(ENV_DIR, "bin", "python"),
- os.path.join(ENV_DIR, "bin", "pip"), "install", "-v"]
+ if is_py3:
+ impala_pip_base_cmd = [os.path.join(venv_dir, "bin", "python3"),
+ os.path.join(venv_dir, "bin", "pip3"), "install", "-v"]
+ else:
+ impala_pip_base_cmd = [os.path.join(venv_dir, "bin", "python"),
+ os.path.join(venv_dir, "bin", "pip"), "install", "-v"]
# Passes --no-binary for IMPALA-3767: without this, Cython (and
# several other packages) fail download.
@@ -181,7 +202,9 @@ def exec_pip_install(args, cc="no-cc-available", env=None):
impala_pip_base_cmd[:] + ["--no-binary", ":all:", "--no-cache-dir"]
# When using a custom mirror, we also must use the index of that mirror.
- if "PYPI_MIRROR" in os.environ:
+ # The python 3 virtualenv has trouble with using --index-url with PYPI_MIRROR,
+ # so it falls back to --no-index, which works fine.
+ if "PYPI_MIRROR" in os.environ and not is_py3:
third_party_pkg_install_cmd.extend(["--index-url",
"%s/simple" % os.environ["PYPI_MIRROR"]])
else:
@@ -217,7 +240,7 @@ def find_file(*paths):
return files[0]
-def download_toolchain_python():
+def download_toolchain_python(is_py3):
'''Grabs the Python implementation from the Impala toolchain, using the machinery from
bin/bootstrap_toolchain.py.
Skip the download if SKIP_TOOLCHAIN_BOOTSTRAP=true in the environment. In that case
@@ -229,27 +252,35 @@ def download_toolchain_python():
raise Exception("Impala environment not set up correctly, make sure "
"$IMPALA_TOOLCHAIN_PACKAGES_HOME is set.")
- package = ToolchainPackage("python")
+ if is_py3:
+ package = ToolchainPackage("python",
+ explicit_version=os.environ["IMPALA_PYTHON3_VERSION"])
+ else:
+ package = ToolchainPackage("python")
if package.needs_download() and \
not (os.environ.get(SKIP_TOOLCHAIN_BOOTSTRAP) == 'true'):
package.download()
- python_cmd = os.path.join(package.pkg_directory(), "bin/python")
+ if is_py3:
+ python_cmd = os.path.join(package.pkg_directory(), "bin/python3")
+ else:
+ python_cmd = os.path.join(package.pkg_directory(), "bin/python")
if not os.path.exists(python_cmd):
raise Exception("Unexpected error bootstrapping python from toolchain: {0} does not "
"exist".format(python_cmd))
return python_cmd
-def install_deps():
- LOG.info("Installing setuptools into the virtualenv")
- exec_pip_install(["-r", SETUPTOOLS_REQS_PATH])
+def install_deps(venv_dir, is_py3):
+ py_str = "3" if is_py3 else "2"
+ LOG.info("Installing setuptools into the python{0} virtualenv".format(py_str))
+ exec_pip_install(venv_dir, is_py3, ["-r", SETUPTOOLS_REQS_PATH])
cc = select_cc()
if cc is None:
raise Exception("CC not available")
env = dict(os.environ)
- LOG.info("Installing packages into the virtualenv")
- exec_pip_install(["-r", REQS_PATH], cc=cc, env=env)
- mark_reqs_installed(REQS_PATH)
+ LOG.info("Installing packages into the python{0} virtualenv".format(py_str))
+ exec_pip_install(venv_dir, is_py3, ["-r", REQS_PATH], cc=cc, env=env)
+ mark_reqs_installed(venv_dir, REQS_PATH)
def have_toolchain():
@@ -264,26 +295,44 @@ def toolchain_pkg_dir(pkg_name):
pkg_name + "-" + pkg_version)
-def install_adls_deps():
+def install_adls_deps(venv_dir, is_py3):
# The ADLS dependencies require that the OS is at least CentOS 6.7 or above,
# which is why we break this into a seperate step. If the target filesystem is
# ADLS, the expectation is that the dev environment is running at least CentOS 6.7.
if os.environ.get('TARGET_FILESYSTEM') == "adls":
- if reqs_are_installed(ADLS_REQS_PATH):
+ if reqs_are_installed(venv_dir, ADLS_REQS_PATH):
LOG.debug("Skipping ADLS deps: matching adls-installed-requirements.txt found")
return True
cc = select_cc()
assert cc is not None
- LOG.info("Installing ADLS packages into the virtualenv")
- exec_pip_install(["-r", ADLS_REQS_PATH], cc=cc)
- mark_reqs_installed(ADLS_REQS_PATH)
+ py_str = "3" if is_py3 else "2"
+ LOG.info("Installing ADLS packages into the python{0} virtualenv".format(py_str))
+ exec_pip_install(venv_dir, is_py3, ["-r", ADLS_REQS_PATH], cc=cc)
+ mark_reqs_installed(venv_dir, ADLS_REQS_PATH)
-def install_kudu_client_if_possible():
+def install_py_version_deps(venv_dir, is_py3):
+ cc = select_cc()
+ assert cc is not None
+ if not is_py3:
+ if not reqs_are_installed(venv_dir, PY2_REQS_PATH):
+ # These are extra python2-only packages
+ LOG.info("Installing python2 packages into the virtualenv")
+ exec_pip_install(venv_dir, is_py3, ["-r", PY2_REQS_PATH], cc=cc)
+ mark_reqs_installed(venv_dir, PY2_REQS_PATH)
+ else:
+ if not reqs_are_installed(venv_dir, PY3_REQS_PATH):
+ # These are extra python3-only packages
+ LOG.info("Installing python3 packages into the virtualenv")
+ exec_pip_install(venv_dir, is_py3, ["-r", PY3_REQS_PATH], cc=cc)
+ mark_reqs_installed(venv_dir, PY3_REQS_PATH)
+
+
+def install_kudu_client_if_possible(venv_dir, is_py3):
'''Installs the Kudu python module if possible, which depends on the toolchain and
the compiled requirements in requirements.txt. If the toolchain isn't
available, nothing will be done.'''
- if reqs_are_installed(KUDU_REQS_PATH):
+ if reqs_are_installed(venv_dir, KUDU_REQS_PATH):
LOG.debug("Skipping Kudu: matching kudu-installed-requirements.txt found")
return
kudu_base_dir = os.environ["IMPALA_KUDU_HOME"]
@@ -291,11 +340,13 @@ def install_kudu_client_if_possible():
LOG.debug("Skipping Kudu: %s doesn't exist" % kudu_base_dir)
return
- LOG.info("Installing Kudu into the virtualenv")
+ py_str = "3" if is_py3 else "2"
+ LOG.info("Installing Kudu into the python{0} virtualenv".format(py_str))
# The installation requires that KUDU_HOME/build/latest exists. An empty directory
# structure will be made to satisfy that. The Kudu client headers and lib will be made
# available through GCC environment variables.
- fake_kudu_build_dir = os.path.join(tempfile.gettempdir(), "virtualenv-kudu")
+ fake_kudu_build_dir = os.path.join(tempfile.gettempdir(),
+ "virtualenv-kudu{0}".format(py_str))
try:
artifact_dir = os.path.join(fake_kudu_build_dir, "build", "latest")
if not os.path.exists(artifact_dir):
@@ -312,8 +363,8 @@ def install_kudu_client_if_possible():
env["CPLUS_INCLUDE_PATH"] = os.path.join(kudu_client_dir, "include")
env["LIBRARY_PATH"] = os.path.pathsep.join([os.path.join(kudu_client_dir, 'lib'),
os.path.join(kudu_client_dir, 'lib64')])
- exec_pip_install(["-r", KUDU_REQS_PATH], cc=cc, env=env)
- mark_reqs_installed(KUDU_REQS_PATH)
+ exec_pip_install(venv_dir, is_py3, ["-r", KUDU_REQS_PATH], cc=cc, env=env)
+ mark_reqs_installed(venv_dir, KUDU_REQS_PATH)
finally:
try:
shutil.rmtree(fake_kudu_build_dir)
@@ -353,17 +404,17 @@ def error_if_kudu_client_not_found(install_dir):
raise Exception("%s not found at %s" % (kudu_client_lib, lib_dir))
-def mark_reqs_installed(reqs_path):
+def mark_reqs_installed(venv_dir, reqs_path):
'''Mark that the requirements from the given file are installed by copying it into
the root directory of the virtualenv.'''
- installed_reqs_path = os.path.join(ENV_DIR, os.path.basename(reqs_path))
+ installed_reqs_path = os.path.join(venv_dir, os.path.basename(reqs_path))
shutil.copyfile(reqs_path, installed_reqs_path)
-def reqs_are_installed(reqs_path):
+def reqs_are_installed(venv_dir, reqs_path):
'''Check if the requirements from the given file are installed in the virtualenv by
looking for a matching requirements file in the root directory of the virtualenv.'''
- installed_reqs_path = os.path.join(ENV_DIR, os.path.basename(reqs_path))
+ installed_reqs_path = os.path.join(venv_dir, os.path.basename(reqs_path))
if not os.path.exists(installed_reqs_path):
return False
installed_reqs_file = open(installed_reqs_path)
@@ -381,11 +432,11 @@ def reqs_are_installed(reqs_path):
installed_reqs_file.close()
-def setup_virtualenv_if_not_exists():
- if not (reqs_are_installed(REQS_PATH)):
- delete_virtualenv_if_exist()
- create_virtualenv()
- install_deps()
+def setup_virtualenv_if_not_exists(venv_dir, is_py3):
+ if not (reqs_are_installed(venv_dir, REQS_PATH)):
+ delete_virtualenv_if_exist(venv_dir)
+ create_virtualenv(venv_dir, is_py3)
+ install_deps(venv_dir, is_py3)
LOG.debug("Virtualenv setup complete")
@@ -397,6 +448,8 @@ if __name__ == "__main__":
" the virtualenv even if it exists and appears to be completely up-to-date.")
parser.add_option("--print-ld-library-path", action="store_true", help="Print the"
" LD_LIBRARY_PATH that should be used when running python from the virtualenv.")
+ parser.add_option("--python3", action="store_true", help="Generate the python3"
+ " virtualenv")
options, args = parser.parse_args()
if options.print_ld_library_path:
@@ -411,10 +464,17 @@ if __name__ == "__main__":
sys.exit()
logging.basicConfig(level=getattr(logging, options.log_level))
+
+ if options.python3:
+ venv_dir = ENV_DIR_PY3
+ else:
+ venv_dir = ENV_DIR_PY2
+
if options.rebuild:
- delete_virtualenv_if_exist()
+ delete_virtualenv_if_exist(venv_dir)
# Complete as many bootstrap steps as possible (see file comment for the steps).
- setup_virtualenv_if_not_exists()
- install_kudu_client_if_possible()
- install_adls_deps()
+ setup_virtualenv_if_not_exists(venv_dir, options.python3)
+ install_kudu_client_if_possible(venv_dir, options.python3)
+ install_adls_deps(venv_dir, options.python3)
+ install_py_version_deps(venv_dir, options.python3)
diff --git a/infra/python/deps/pip_download.py b/infra/python/deps/pip_download.py
index 9c41135d8..03713f927 100755
--- a/infra/python/deps/pip_download.py
+++ b/infra/python/deps/pip_download.py
@@ -38,7 +38,8 @@ PYPI_MIRROR = os.environ.get('PYPI_MIRROR', 'https://pypi.python.org')
# The requirement files that list all of the required packages and versions.
REQUIREMENTS_FILES = ['requirements.txt', 'setuptools-requirements.txt',
- 'kudu-requirements.txt', 'adls-requirements.txt']
+ 'kudu-requirements.txt', 'adls-requirements.txt',
+ 'py2-requirements.txt', 'py3-requirements.txt']
def check_digest(filename, algorithm, expected_digest):
diff --git a/infra/python/deps/setuptools-requirements.txt b/infra/python/deps/py2-requirements.txt
similarity index 65%
copy from infra/python/deps/setuptools-requirements.txt
copy to infra/python/deps/py2-requirements.txt
index 071f9fc54..122b5ab68 100644
--- a/infra/python/deps/setuptools-requirements.txt
+++ b/infra/python/deps/py2-requirements.txt
@@ -15,7 +15,20 @@
# specific language governing permissions and limitations
# under the License.
-# Newer versions of setuptools don't support Python 2.7
-setuptools == 44.1.1
- wheel == 0.35.1
-setuptools-scm == 4.1.2
+# Python2-only requirements
+
+cm-api == 10.0.0
+ # Already available as part of python on Linux.
+ readline == 6.2.4.1; sys_platform == 'darwin'
+flake8 == 3.9.2
+ mccabe == 0.6.1
+ pycodestyle == 2.7.0
+ pyflakes == 2.3.1
+ enum34 == 1.1.10
+ typing == 3.10.0.0
+ configparser == 4.0.2
+ functools32 == 3.2.3-2
+ importlib-metadata == 2.1.3
+ contextlib2 == 0.6.0
+ pathlib2 == 2.3.7.post1
+ zipp == 1.2.0
diff --git a/infra/python/deps/setuptools-requirements.txt b/infra/python/deps/py3-requirements.txt
similarity index 70%
copy from infra/python/deps/setuptools-requirements.txt
copy to infra/python/deps/py3-requirements.txt
index 071f9fc54..d6195a1e8 100644
--- a/infra/python/deps/setuptools-requirements.txt
+++ b/infra/python/deps/py3-requirements.txt
@@ -15,7 +15,17 @@
# specific language governing permissions and limitations
# under the License.
-# Newer versions of setuptools don't support Python 2.7
-setuptools == 44.1.1
- wheel == 0.35.1
-setuptools-scm == 4.1.2
+# Python3-only requirements
+
+pylint == 2.10.2
+ astroid == 2.7.3
+ lazy-object-proxy == 1.6.0
+ wrapt == 1.12.1
+ typed-ast == 1.4.3
+ configparser == 4.0.2
+ isort == 4.3.21
+ futures == 3.3.0; python_version == "2.7"
+ singledispatch == 3.6.1
+ toml == 0.10.2
+ platformdirs == 2.4.1
+ typing-extensions == 3.10.0.2
diff --git a/infra/python/deps/requirements.txt b/infra/python/deps/requirements.txt
index 271ddeadc..fe61de14b 100644
--- a/infra/python/deps/requirements.txt
+++ b/infra/python/deps/requirements.txt
@@ -20,23 +20,8 @@
# Dependents are indented. Dependents that have multiple parents are not listed
# multiple times (though maybe they could be).
-allpairs == 2.0.1
+allpairspy == 2.5.0
argparse == 1.4.0
-cm-api == 10.0.0
- # Already available as part of python on Linux.
- readline == 6.2.4.1; sys_platform == 'darwin'
-flake8 == 3.9.2
- mccabe == 0.6.1
- pycodestyle == 2.7.0
- pyflakes == 2.3.1
- enum34 == 1.1.10
- typing == 3.10.0.0
- configparser == 4.0.2
- functools32 == 3.2.3-2
- importlib-metadata == 2.1.3
- contextlib2 == 0.6.0
- pathlib2 == 2.3.7.post1
- zipp == 1.2.0
future == 0.18.3
gcovr == 4.2
Jinja2 == 2.11.3
@@ -61,14 +46,13 @@ pyparsing == 2.0.3
pytest == 2.9.2
py == 1.4.32
pytest-forked == 0.2
- pytest-random == 0.02
pytest-runner == 4.2
pytest-xdist == 1.17.1
pytest-timeout == 1.2.1
python-magic == 0.4.11
pywebhdfs == 0.3.2
pbr == 3.1.1
-requests == 2.20.0
+requests == 2.21.0
chardet == 3.0.4
idna == 2.8
urllib3 == 1.24.2
diff --git a/infra/python/deps/setuptools-requirements.txt b/infra/python/deps/setuptools-requirements.txt
index 071f9fc54..713bfa0af 100644
--- a/infra/python/deps/setuptools-requirements.txt
+++ b/infra/python/deps/setuptools-requirements.txt
@@ -18,4 +18,4 @@
# Newer versions of setuptools don't support Python 2.7
setuptools == 44.1.1
wheel == 0.35.1
-setuptools-scm == 4.1.2
+setuptools-scm == 5.0.2
diff --git a/testdata/bin/generate-test-vectors.py b/testdata/bin/generate-test-vectors.py
index 4998a8caa..c7d288bd8 100755
--- a/testdata/bin/generate-test-vectors.py
+++ b/testdata/bin/generate-test-vectors.py
@@ -48,8 +48,7 @@ import os
import sys
from itertools import product
from optparse import OptionParser
-import metacomm.combinatorics.all_pairs2
-all_pairs = metacomm.combinatorics.all_pairs2.all_pairs2
+from allpairspy import AllPairs as all_pairs
parser = OptionParser()
parser.add_option("-w", "--workload", dest="workload",
diff --git a/tests/common/test_vector.py b/tests/common/test_vector.py
index 005c35adb..8fcac5a79 100644
--- a/tests/common/test_vector.py
+++ b/tests/common/test_vector.py
@@ -136,8 +136,8 @@ class ImpalaTestMatrix(object):
if self.is_valid(vec)]
def __generate_pairwise_combinations(self):
- import metacomm.combinatorics.all_pairs2
- all_pairs = metacomm.combinatorics.all_pairs2.all_pairs2
+ from allpairspy import AllPairs
+ all_pairs = AllPairs
# Pairwise fails if the number of inputs == 1. Use exhaustive in this case the
# results will be the same.
diff --git a/tests/custom_cluster/test_hs2_fault_injection.py b/tests/custom_cluster/test_hs2_fault_injection.py
index 0b3f8e7b4..27e536fdd 100644
--- a/tests/custom_cluster/test_hs2_fault_injection.py
+++ b/tests/custom_cluster/test_hs2_fault_injection.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
diff --git a/tests/query_test/test_decimal_casting.py b/tests/query_test/test_decimal_casting.py
index 752e3ad64..f487e8bc3 100644
--- a/tests/query_test/test_decimal_casting.py
+++ b/tests/query_test/test_decimal_casting.py
@@ -19,7 +19,7 @@
#
import pytest
from decimal import Decimal, getcontext, ROUND_DOWN, ROUND_HALF_UP
-from metacomm.combinatorics.all_pairs2 import all_pairs2 as all_pairs
+from allpairspy import AllPairs as all_pairs
from random import randint
from tests.common.impala_test_suite import ImpalaTestSuite