You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ho...@apache.org on 2017/03/29 18:41:22 UTC
spark git commit: [SPARK-19955][PYSPARK] Jenkins Python Conda based
test.
Repository: spark
Updated Branches:
refs/heads/master c622a87c4 -> d6ddfdf60
[SPARK-19955][PYSPARK] Jenkins Python Conda based test.
## What changes were proposed in this pull request?
Allow Jenkins Python tests to use the installed conda to test Python 2.7 support & test pip installability.
## How was this patch tested?
Updated shell scripts, ran tests locally with installed conda, ran tests in Jenkins.
Author: Holden Karau <ho...@us.ibm.com>
Closes #17355 from holdenk/SPARK-19955-support-python-tests-with-conda.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d6ddfdf6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d6ddfdf6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d6ddfdf6
Branch: refs/heads/master
Commit: d6ddfdf60e77340256873b5acf08e85f95cf3bc2
Parents: c622a87
Author: Holden Karau <ho...@us.ibm.com>
Authored: Wed Mar 29 11:41:17 2017 -0700
Committer: Holden Karau <ho...@us.ibm.com>
Committed: Wed Mar 29 11:41:17 2017 -0700
----------------------------------------------------------------------
dev/run-pip-tests | 66 +++++++++++++++++++++++++++++-----------------
dev/run-tests-jenkins | 3 ++-
python/run-tests.py | 6 ++---
3 files changed, 47 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/d6ddfdf6/dev/run-pip-tests
----------------------------------------------------------------------
diff --git a/dev/run-pip-tests b/dev/run-pip-tests
index af1b1fe..d51dde1 100755
--- a/dev/run-pip-tests
+++ b/dev/run-pip-tests
@@ -35,9 +35,28 @@ function delete_virtualenv() {
}
trap delete_virtualenv EXIT
+PYTHON_EXECS=()
# Some systems don't have pip or virtualenv - in those cases our tests won't work.
-if ! hash virtualenv 2>/dev/null; then
- echo "Missing virtualenv skipping pip installability tests."
+if hash virtualenv 2>/dev/null && [ ! -n "$USE_CONDA" ]; then
+ echo "virtualenv installed - using. Note if this is a conda virtual env you may wish to set USE_CONDA"
+ # Figure out which Python execs we should test pip installation with
+ if hash python2 2>/dev/null; then
+ # We do this since we are testing with virtualenv and the default virtual env python
+ # is in /usr/bin/python
+ PYTHON_EXECS+=('python2')
+ elif hash python 2>/dev/null; then
+ # If python2 isn't installed fallback to python if available
+ PYTHON_EXECS+=('python')
+ fi
+ if hash python3 2>/dev/null; then
+ PYTHON_EXECS+=('python3')
+ fi
+elif hash conda 2>/dev/null; then
+ echo "Using conda virtual enviroments"
+ PYTHON_EXECS=('3.5')
+ USE_CONDA=1
+else
+ echo "Missing virtualenv & conda, skipping pip installability tests"
exit 0
fi
if ! hash pip 2>/dev/null; then
@@ -45,22 +64,8 @@ if ! hash pip 2>/dev/null; then
exit 0
fi
-# Figure out which Python execs we should test pip installation with
-PYTHON_EXECS=()
-if hash python2 2>/dev/null; then
- # We do this since we are testing with virtualenv and the default virtual env python
- # is in /usr/bin/python
- PYTHON_EXECS+=('python2')
-elif hash python 2>/dev/null; then
- # If python2 isn't installed fallback to python if available
- PYTHON_EXECS+=('python')
-fi
-if hash python3 2>/dev/null; then
- PYTHON_EXECS+=('python3')
-fi
-
# Determine which version of PySpark we are building for archive name
-PYSPARK_VERSION=$(python -c "exec(open('python/pyspark/version.py').read());print __version__")
+PYSPARK_VERSION=$(python3 -c "exec(open('python/pyspark/version.py').read());print(__version__)")
PYSPARK_DIST="$FWDIR/python/dist/pyspark-$PYSPARK_VERSION.tar.gz"
# The pip install options we use for all the pip commands
PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall "
@@ -75,18 +80,24 @@ for python in "${PYTHON_EXECS[@]}"; do
echo "Using $VIRTUALENV_BASE for virtualenv"
VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python
rm -rf "$VIRTUALENV_PATH"
- mkdir -p "$VIRTUALENV_PATH"
- virtualenv --python=$python "$VIRTUALENV_PATH"
- source "$VIRTUALENV_PATH"/bin/activate
- # Upgrade pip & friends
- pip install --upgrade pip pypandoc wheel
- pip install numpy # Needed so we can verify mllib imports
+ if [ -n "$USE_CONDA" ]; then
+ conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip setuptools
+ source activate "$VIRTUALENV_PATH"
+ else
+ mkdir -p "$VIRTUALENV_PATH"
+ virtualenv --python=$python "$VIRTUALENV_PATH"
+ source "$VIRTUALENV_PATH"/bin/activate
+ fi
+ # Upgrade pip & friends if using virutal env
+ if [ ! -n "USE_CONDA" ]; then
+ pip install --upgrade pip pypandoc wheel numpy
+ fi
echo "Creating pip installable source dist"
cd "$FWDIR"/python
# Delete the egg info file if it exists, this can cache the setup file.
rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
- $python setup.py sdist
+ python setup.py sdist
echo "Installing dist into virtual env"
@@ -112,6 +123,13 @@ for python in "${PYTHON_EXECS[@]}"; do
cd "$FWDIR"
+ # conda / virtualenv enviroments need to be deactivated differently
+ if [ -n "$USE_CONDA" ]; then
+ source deactivate
+ else
+ deactivate
+ fi
+
done
done
http://git-wip-us.apache.org/repos/asf/spark/blob/d6ddfdf6/dev/run-tests-jenkins
----------------------------------------------------------------------
diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins
index e79accf..f41f1ac 100755
--- a/dev/run-tests-jenkins
+++ b/dev/run-tests-jenkins
@@ -22,7 +22,8 @@
# Environment variables are populated by the code here:
#+ https://github.com/jenkinsci/ghprb-plugin/blob/master/src/main/java/org/jenkinsci/plugins/ghprb/GhprbTrigger.java#L139
-FWDIR="$(cd "`dirname $0`"/..; pwd)"
+FWDIR="$( cd "$( dirname "$0" )/.." && pwd )"
cd "$FWDIR"
+export PATH=/home/anaconda/bin:$PATH
exec python -u ./dev/run-tests-jenkins.py "$@"
http://git-wip-us.apache.org/repos/asf/spark/blob/d6ddfdf6/python/run-tests.py
----------------------------------------------------------------------
diff --git a/python/run-tests.py b/python/run-tests.py
index 53a0aef..b2e5043 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -111,9 +111,9 @@ def run_individual_python_test(test_name, pyspark_python):
def get_default_python_executables():
- python_execs = [x for x in ["python2.6", "python3.4", "pypy"] if which(x)]
- if "python2.6" not in python_execs:
- LOGGER.warning("Not testing against `python2.6` because it could not be found; falling"
+ python_execs = [x for x in ["python2.7", "python3.4", "pypy"] if which(x)]
+ if "python2.7" not in python_execs:
+ LOGGER.warning("Not testing against `python2.7` because it could not be found; falling"
" back to `python` instead")
python_execs.insert(0, "python")
return python_execs
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org